diff --git a/CMakeLists.txt b/CMakeLists.txt
index 463718b31c76..aad902f95373 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -445,7 +445,6 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/lib/std/math/big.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/math/big/int.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/math/float.zig"
-    "${CMAKE_SOURCE_DIR}/lib/std/math/floor.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/math/frexp.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/math/isinf.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/math/isnan.zig"
@@ -482,20 +481,40 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/absv.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/addXf3.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/addo.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/arm.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/atomics.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/aulldiv.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/aullrem.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/bswap.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/ceil.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/clear_cache.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/cmp.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/compareXf2.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/cos.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/count0bits.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/divdf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/divsf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/divtf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/divti3.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/divxf3.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/emutls.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/exp.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/exp2.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/extendXfYf2.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/extend_f80.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fabs.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fixXfYi.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/floatXiYf.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/floor.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fma.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fmax.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fmin.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/fmod.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/int.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/log.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/log10.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/log2.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/modti3.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/mulXf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/muldi3.zig"
@@ -507,9 +526,22 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/os_version_check.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/parity.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/popcount.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/rem_pio2.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/rem_pio2_large.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/rem_pio2f.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/round.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/shift.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/sin.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/sincos.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/sparc.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/sqrt.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/stack_probe.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/subo.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/tan.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/trig.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/trunc.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/truncXfYf2.zig"
+    "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/trunc_f80.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/udivmod.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/udivmodti4.zig"
     "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/udivti3.zig"
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 5cccced446e1..3c5de6c8d28a 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8026,7 +8026,7 @@ fn func(y: *i32) void {
       only rounds once, and is thus more accurate.
       </p>
       <p>
-      Supports Floats and Vectors of floats.
+      Supports {#link|Floats#} and {#link|Vectors#} of floats.
       </p>
       {#header_close#}
 
@@ -9440,6 +9440,7 @@ fn doTheTest() !void {
       <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
       </p>
       {#header_close#}
+
       {#header_open|@cos#}
       <pre>{#syntax#}@cos(value: anytype) @TypeOf(value){#endsyntax#}</pre>
       <p>
@@ -9451,6 +9452,19 @@ fn doTheTest() !void {
       <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
       </p>
       {#header_close#}
+
+      {#header_open|@tan#}
+      <pre>{#syntax#}@tan(value: anytype) @TypeOf(value){#endsyntax#}</pre>
+      <p>
+      Tangent trigonometric function on a floating point number.
+      Uses a dedicated hardware instruction when available.
+      </p>
+      <p>
+      Supports {#link|Floats#} and {#link|Vectors#} of floats, with the caveat that
+      <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
+      </p>
+      {#header_close#}
+
       {#header_open|@exp#}
       <pre>{#syntax#}@exp(value: anytype) @TypeOf(value){#endsyntax#}</pre>
       <p>
diff --git a/lib/std/fmt/errol.zig b/lib/std/fmt/errol.zig
index 29dd2b7a631f..1ce72de0fc64 100644
--- a/lib/std/fmt/errol.zig
+++ b/lib/std/fmt/errol.zig
@@ -113,7 +113,7 @@ fn errolSlow(val: f64, buffer: []u8) FloatDecimal {
     // normalize the midpoint
 
     const e = math.frexp(val).exponent;
-    var exp = @floatToInt(i16, math.floor(307 + @intToFloat(f64, e) * 0.30103));
+    var exp = @floatToInt(i16, @floor(307 + @intToFloat(f64, e) * 0.30103));
     if (exp < 20) {
         exp = 20;
     } else if (@intCast(usize, exp) >= lookup_table.len) {
@@ -170,10 +170,10 @@ fn errolSlow(val: f64, buffer: []u8) FloatDecimal {
     // digit generation
     var buf_index: usize = 0;
     while (true) {
-        var hdig = @floatToInt(u8, math.floor(high.val));
+        var hdig = @floatToInt(u8, @floor(high.val));
         if ((high.val == @intToFloat(f64, hdig)) and (high.off < 0)) hdig -= 1;
 
-        var ldig = @floatToInt(u8, math.floor(low.val));
+        var ldig = @floatToInt(u8, @floor(low.val));
         if ((low.val == @intToFloat(f64, ldig)) and (low.off < 0)) ldig -= 1;
 
         if (ldig != hdig) break;
@@ -187,7 +187,7 @@ fn errolSlow(val: f64, buffer: []u8) FloatDecimal {
     }
 
     const tmp = (high.val + low.val) / 2.0;
-    var mdig = @floatToInt(u8, math.floor(tmp + 0.5));
+    var mdig = @floatToInt(u8, @floor(tmp + 0.5));
     if ((@intToFloat(f64, mdig) - tmp) == 0.5 and (mdig & 0x1) != 0) mdig -= 1;
 
     buffer[buf_index] = mdig + '0';
diff --git a/lib/std/json.zig b/lib/std/json.zig
index c18f38754aaf..b670e488b22a 100644
--- a/lib/std/json.zig
+++ b/lib/std/json.zig
@@ -1655,7 +1655,7 @@ fn parseInternal(
             if (numberToken.is_integer)
                 return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
             const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1));
-            if (std.math.round(float) != float) return error.InvalidNumber;
+            if (@round(float) != float) return error.InvalidNumber;
             if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
             return @floatToInt(T, float);
         },
diff --git a/lib/std/math.zig b/lib/std/math.zig
index b229c8973eaf..214ade39ce56 100644
--- a/lib/std/math.zig
+++ b/lib/std/math.zig
@@ -138,7 +138,7 @@ pub fn approxEqAbs(comptime T: type, x: T, y: T, tolerance: T) bool {
     if (isNan(x) or isNan(y))
         return false;
 
-    return fabs(x - y) <= tolerance;
+    return @fabs(x - y) <= tolerance;
 }
 
 /// Performs an approximate comparison of two floating point values `x` and `y`.
@@ -166,7 +166,7 @@ pub fn approxEqRel(comptime T: type, x: T, y: T, tolerance: T) bool {
     if (isNan(x) or isNan(y))
         return false;
 
-    return fabs(x - y) <= max(fabs(x), fabs(y)) * tolerance;
+    return @fabs(x - y) <= max(@fabs(x), @fabs(y)) * tolerance;
 }
 
 pub fn approxEq(comptime T: type, x: T, y: T, tolerance: T) bool {
@@ -233,11 +233,6 @@ pub fn raiseDivByZero() void {
 
 pub const isNan = @import("math/isnan.zig").isNan;
 pub const isSignalNan = @import("math/isnan.zig").isSignalNan;
-pub const fabs = @import("math/fabs.zig").fabs;
-pub const ceil = @import("math/ceil.zig").ceil;
-pub const floor = @import("math/floor.zig").floor;
-pub const trunc = @import("math/trunc.zig").trunc;
-pub const round = @import("math/round.zig").round;
 pub const frexp = @import("math/frexp.zig").frexp;
 pub const Frexp = @import("math/frexp.zig").Frexp;
 pub const modf = @import("math/modf.zig").modf;
@@ -261,8 +256,6 @@ pub const asin = @import("math/asin.zig").asin;
 pub const atan = @import("math/atan.zig").atan;
 pub const atan2 = @import("math/atan2.zig").atan2;
 pub const hypot = @import("math/hypot.zig").hypot;
-pub const exp = @import("math/exp.zig").exp;
-pub const exp2 = @import("math/exp2.zig").exp2;
 pub const expm1 = @import("math/expm1.zig").expm1;
 pub const ilogb = @import("math/ilogb.zig").ilogb;
 pub const ln = @import("math/ln.zig").ln;
@@ -270,16 +263,12 @@ pub const log = @import("math/log.zig").log;
 pub const log2 = @import("math/log2.zig").log2;
 pub const log10 = @import("math/log10.zig").log10;
 pub const log1p = @import("math/log1p.zig").log1p;
-pub const fma = @import("math/fma.zig").fma;
 pub const asinh = @import("math/asinh.zig").asinh;
 pub const acosh = @import("math/acosh.zig").acosh;
 pub const atanh = @import("math/atanh.zig").atanh;
 pub const sinh = @import("math/sinh.zig").sinh;
 pub const cosh = @import("math/cosh.zig").cosh;
 pub const tanh = @import("math/tanh.zig").tanh;
-pub const cos = @import("math/cos.zig").cos;
-pub const sin = @import("math/sin.zig").sin;
-pub const tan = @import("math/tan.zig").tan;
 
 pub const complex = @import("math/complex.zig");
 pub const Complex = complex.Complex;
@@ -716,17 +705,6 @@ fn testAbsInt() !void {
     try testing.expect((absInt(@as(i32, 10)) catch unreachable) == 10);
 }
 
-pub const absFloat = fabs;
-
-test "absFloat" {
-    try testAbsFloat();
-    comptime try testAbsFloat();
-}
-fn testAbsFloat() !void {
-    try testing.expect(absFloat(@as(f32, -10.05)) == 10.05);
-    try testing.expect(absFloat(@as(f32, 10.05)) == 10.05);
-}
-
 /// Divide numerator by denominator, rounding toward zero. Returns an
 /// error on overflow or when denominator is zero.
 pub fn divTrunc(comptime T: type, numerator: T, denominator: T) !T {
@@ -1400,11 +1378,6 @@ test "order.compare" {
     try testing.expect(order(1, 0).compare(.neq));
 }
 
-test "comptime sin and ln" {
-    const v = comptime (sin(@as(f32, 1)) + ln(@as(f32, 5)));
-    try testing.expect(v == sin(@as(f32, 1)) + ln(@as(f32, 5)));
-}
-
 /// Returns a mask of all ones if value is true,
 /// and a mask of all zeroes if value is false.
 /// Compiles to one instruction for register sized integers.
diff --git a/lib/std/math/acos.zig b/lib/std/math/acos.zig
index b90ba9c78ee6..e88bed72277b 100644
--- a/lib/std/math/acos.zig
+++ b/lib/std/math/acos.zig
@@ -64,14 +64,14 @@ fn acos32(x: f32) f32 {
     // x < -0.5
     if (hx >> 31 != 0) {
         const z = (1 + x) * 0.5;
-        const s = math.sqrt(z);
+        const s = @sqrt(z);
         const w = r32(z) * s - pio2_lo;
         return 2 * (pio2_hi - (s + w));
     }
 
     // x > 0.5
     const z = (1.0 - x) * 0.5;
-    const s = math.sqrt(z);
+    const s = @sqrt(z);
     const jx = @bitCast(u32, s);
     const df = @bitCast(f32, jx & 0xFFFFF000);
     const c = (z - df * df) / (s + df);
@@ -133,14 +133,14 @@ fn acos64(x: f64) f64 {
     // x < -0.5
     if (hx >> 31 != 0) {
         const z = (1.0 + x) * 0.5;
-        const s = math.sqrt(z);
+        const s = @sqrt(z);
         const w = r64(z) * s - pio2_lo;
         return 2 * (pio2_hi - (s + w));
     }
 
     // x > 0.5
     const z = (1.0 - x) * 0.5;
-    const s = math.sqrt(z);
+    const s = @sqrt(z);
     const jx = @bitCast(u64, s);
     const df = @bitCast(f64, jx & 0xFFFFFFFF00000000);
     const c = (z - df * df) / (s + df);
diff --git a/lib/std/math/acosh.zig b/lib/std/math/acosh.zig
index e42f4fd5d3eb..a78130d2ef07 100644
--- a/lib/std/math/acosh.zig
+++ b/lib/std/math/acosh.zig
@@ -29,15 +29,15 @@ fn acosh32(x: f32) f32 {
 
     // |x| < 2, invalid if x < 1 or nan
     if (i < 0x3F800000 + (1 << 23)) {
-        return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
+        return math.log1p(x - 1 + @sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
     }
     // |x| < 0x1p12
     else if (i < 0x3F800000 + (12 << 23)) {
-        return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
+        return @log(2 * x - 1 / (x + @sqrt(x * x - 1)));
     }
     // |x| >= 0x1p12
     else {
-        return math.ln(x) + 0.693147180559945309417232121458176568;
+        return @log(x) + 0.693147180559945309417232121458176568;
     }
 }
 
@@ -47,15 +47,15 @@ fn acosh64(x: f64) f64 {
 
     // |x| < 2, invalid if x < 1 or nan
     if (e < 0x3FF + 1) {
-        return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
+        return math.log1p(x - 1 + @sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
     }
     // |x| < 0x1p26
     else if (e < 0x3FF + 26) {
-        return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
+        return @log(2 * x - 1 / (x + @sqrt(x * x - 1)));
     }
     // |x| >= 0x1p26 or nan
     else {
-        return math.ln(x) + 0.693147180559945309417232121458176568;
+        return @log(x) + 0.693147180559945309417232121458176568;
     }
 }
 
diff --git a/lib/std/math/asin.zig b/lib/std/math/asin.zig
index 0849fac72e1e..48ad04c579cb 100644
--- a/lib/std/math/asin.zig
+++ b/lib/std/math/asin.zig
@@ -60,8 +60,8 @@ fn asin32(x: f32) f32 {
     }
 
     // 1 > |x| >= 0.5
-    const z = (1 - math.fabs(x)) * 0.5;
-    const s = math.sqrt(z);
+    const z = (1 - @fabs(x)) * 0.5;
+    const s = @sqrt(z);
     const fx = pio2 - 2 * (s + s * r32(z));
 
     if (hx >> 31 != 0) {
@@ -119,8 +119,8 @@ fn asin64(x: f64) f64 {
     }
 
     // 1 > |x| >= 0.5
-    const z = (1 - math.fabs(x)) * 0.5;
-    const s = math.sqrt(z);
+    const z = (1 - @fabs(x)) * 0.5;
+    const s = @sqrt(z);
     const r = r64(z);
     var fx: f64 = undefined;
 
diff --git a/lib/std/math/asinh.zig b/lib/std/math/asinh.zig
index 8717ebbb66d0..65028ef5d9dc 100644
--- a/lib/std/math/asinh.zig
+++ b/lib/std/math/asinh.zig
@@ -39,15 +39,15 @@ fn asinh32(x: f32) f32 {
 
     // |x| >= 0x1p12 or inf or nan
     if (i >= 0x3F800000 + (12 << 23)) {
-        rx = math.ln(rx) + 0.69314718055994530941723212145817656;
+        rx = @log(rx) + 0.69314718055994530941723212145817656;
     }
     // |x| >= 2
     else if (i >= 0x3F800000 + (1 << 23)) {
-        rx = math.ln(2 * x + 1 / (math.sqrt(x * x + 1) + x));
+        rx = @log(2 * x + 1 / (@sqrt(x * x + 1) + x));
     }
     // |x| >= 0x1p-12, up to 1.6ulp error
     else if (i >= 0x3F800000 - (12 << 23)) {
-        rx = math.log1p(x + x * x / (math.sqrt(x * x + 1) + 1));
+        rx = math.log1p(x + x * x / (@sqrt(x * x + 1) + 1));
     }
     // |x| < 0x1p-12, inexact if x != 0
     else {
@@ -70,15 +70,15 @@ fn asinh64(x: f64) f64 {
 
     // |x| >= 0x1p26 or inf or nan
     if (e >= 0x3FF + 26) {
-        rx = math.ln(rx) + 0.693147180559945309417232121458176568;
+        rx = @log(rx) + 0.693147180559945309417232121458176568;
     }
     // |x| >= 2
     else if (e >= 0x3FF + 1) {
-        rx = math.ln(2 * x + 1 / (math.sqrt(x * x + 1) + x));
+        rx = @log(2 * x + 1 / (@sqrt(x * x + 1) + x));
     }
     // |x| >= 0x1p-12, up to 1.6ulp error
     else if (e >= 0x3FF - 26) {
-        rx = math.log1p(x + x * x / (math.sqrt(x * x + 1) + 1));
+        rx = math.log1p(x + x * x / (@sqrt(x * x + 1) + 1));
     }
     // |x| < 0x1p-12, inexact if x != 0
     else {
diff --git a/lib/std/math/atan.zig b/lib/std/math/atan.zig
index c67e6fe8e092..3a13d943e897 100644
--- a/lib/std/math/atan.zig
+++ b/lib/std/math/atan.zig
@@ -73,7 +73,7 @@ fn atan32(x_: f32) f32 {
         }
         id = null;
     } else {
-        x = math.fabs(x);
+        x = @fabs(x);
         // |x| < 1.1875
         if (ix < 0x3F980000) {
             // 7/16 <= |x| < 11/16
@@ -171,7 +171,7 @@ fn atan64(x_: f64) f64 {
         }
         id = null;
     } else {
-        x = math.fabs(x);
+        x = @fabs(x);
         // |x| < 1.1875
         if (ix < 0x3FF30000) {
             // 7/16 <= |x| < 11/16
diff --git a/lib/std/math/atan2.zig b/lib/std/math/atan2.zig
index d440d65e0458..b9b37e7da424 100644
--- a/lib/std/math/atan2.zig
+++ b/lib/std/math/atan2.zig
@@ -108,7 +108,7 @@ fn atan2_32(y: f32, x: f32) f32 {
         if ((m & 2) != 0 and iy + (26 << 23) < ix) {
             break :z 0.0;
         } else {
-            break :z math.atan(math.fabs(y / x));
+            break :z math.atan(@fabs(y / x));
         }
     };
 
@@ -198,7 +198,7 @@ fn atan2_64(y: f64, x: f64) f64 {
         if ((m & 2) != 0 and iy +% (64 << 20) < ix) {
             break :z 0.0;
         } else {
-            break :z math.atan(math.fabs(y / x));
+            break :z math.atan(@fabs(y / x));
         }
     };
 
diff --git a/lib/std/math/complex.zig b/lib/std/math/complex.zig
index 42342faa3ee7..2fd1cf15a1b0 100644
--- a/lib/std/math/complex.zig
+++ b/lib/std/math/complex.zig
@@ -115,7 +115,7 @@ pub fn Complex(comptime T: type) type {
 
         /// Returns the magnitude of a complex number.
         pub fn magnitude(self: Self) T {
-            return math.sqrt(self.re * self.re + self.im * self.im);
+            return @sqrt(self.re * self.re + self.im * self.im);
         }
     };
 }
diff --git a/lib/std/math/complex/atan.zig b/lib/std/math/complex/atan.zig
index 484b41edf534..929b98aebd22 100644
--- a/lib/std/math/complex/atan.zig
+++ b/lib/std/math/complex/atan.zig
@@ -66,7 +66,7 @@ fn atan32(z: Complex(f32)) Complex(f32) {
 
     t = y + 1.0;
     a = (x2 + (t * t)) / a;
-    return Complex(f32).init(w, 0.25 * math.ln(a));
+    return Complex(f32).init(w, 0.25 * @log(a));
 }
 
 fn redupif64(x: f64) f64 {
@@ -115,7 +115,7 @@ fn atan64(z: Complex(f64)) Complex(f64) {
 
     t = y + 1.0;
     a = (x2 + (t * t)) / a;
-    return Complex(f64).init(w, 0.25 * math.ln(a));
+    return Complex(f64).init(w, 0.25 * @log(a));
 }
 
 const epsilon = 0.0001;
diff --git a/lib/std/math/complex/cosh.zig b/lib/std/math/complex/cosh.zig
index 46f7a714a23a..65cfc4a52830 100644
--- a/lib/std/math/complex/cosh.zig
+++ b/lib/std/math/complex/cosh.zig
@@ -38,25 +38,25 @@ fn cosh32(z: Complex(f32)) Complex(f32) {
         }
         // small x: normal case
         if (ix < 0x41100000) {
-            return Complex(f32).init(math.cosh(x) * math.cos(y), math.sinh(x) * math.sin(y));
+            return Complex(f32).init(math.cosh(x) * @cos(y), math.sinh(x) * @sin(y));
         }
 
         // |x|>= 9, so cosh(x) ~= exp(|x|)
         if (ix < 0x42b17218) {
             // x < 88.7: exp(|x|) won't overflow
-            const h = math.exp(math.fabs(x)) * 0.5;
-            return Complex(f32).init(math.copysign(f32, h, x) * math.cos(y), h * math.sin(y));
+            const h = @exp(@fabs(x)) * 0.5;
+            return Complex(f32).init(math.copysign(f32, h, x) * @cos(y), h * @sin(y));
         }
         // x < 192.7: scale to avoid overflow
         else if (ix < 0x4340b1e7) {
-            const v = Complex(f32).init(math.fabs(x), y);
+            const v = Complex(f32).init(@fabs(x), y);
             const r = ldexp_cexp(v, -1);
             return Complex(f32).init(r.re, r.im * math.copysign(f32, 1, x));
         }
         // x >= 192.7: result always overflows
         else {
             const h = 0x1p127 * x;
-            return Complex(f32).init(h * h * math.cos(y), h * math.sin(y));
+            return Complex(f32).init(h * h * @cos(y), h * @sin(y));
         }
     }
 
@@ -79,7 +79,7 @@ fn cosh32(z: Complex(f32)) Complex(f32) {
         if (iy >= 0x7f800000) {
             return Complex(f32).init(x * x, x * (y - y));
         }
-        return Complex(f32).init((x * x) * math.cos(y), x * math.sin(y));
+        return Complex(f32).init((x * x) * @cos(y), x * @sin(y));
     }
 
     return Complex(f32).init((x * x) * (y - y), (x + x) * (y - y));
@@ -106,25 +106,25 @@ fn cosh64(z: Complex(f64)) Complex(f64) {
         }
         // small x: normal case
         if (ix < 0x40360000) {
-            return Complex(f64).init(math.cosh(x) * math.cos(y), math.sinh(x) * math.sin(y));
+            return Complex(f64).init(math.cosh(x) * @cos(y), math.sinh(x) * @sin(y));
         }
 
         // |x|>= 22, so cosh(x) ~= exp(|x|)
         if (ix < 0x40862e42) {
             // x < 710: exp(|x|) won't overflow
-            const h = math.exp(math.fabs(x)) * 0.5;
-            return Complex(f64).init(h * math.cos(y), math.copysign(f64, h, x) * math.sin(y));
+            const h = @exp(@fabs(x)) * 0.5;
+            return Complex(f64).init(h * @cos(y), math.copysign(f64, h, x) * @sin(y));
         }
         // x < 1455: scale to avoid overflow
         else if (ix < 0x4096bbaa) {
-            const v = Complex(f64).init(math.fabs(x), y);
+            const v = Complex(f64).init(@fabs(x), y);
             const r = ldexp_cexp(v, -1);
             return Complex(f64).init(r.re, r.im * math.copysign(f64, 1, x));
         }
         // x >= 1455: result always overflows
         else {
             const h = 0x1p1023;
-            return Complex(f64).init(h * h * math.cos(y), h * math.sin(y));
+            return Complex(f64).init(h * h * @cos(y), h * @sin(y));
         }
     }
 
@@ -147,7 +147,7 @@ fn cosh64(z: Complex(f64)) Complex(f64) {
         if (iy >= 0x7ff00000) {
             return Complex(f64).init(x * x, x * (y - y));
         }
-        return Complex(f64).init(x * x * math.cos(y), x * math.sin(y));
+        return Complex(f64).init(x * x * @cos(y), x * @sin(y));
     }
 
     return Complex(f64).init((x * x) * (y - y), (x + x) * (y - y));
diff --git a/lib/std/math/complex/exp.zig b/lib/std/math/complex/exp.zig
index ce25025ded77..84ee251d0e0f 100644
--- a/lib/std/math/complex/exp.zig
+++ b/lib/std/math/complex/exp.zig
@@ -33,13 +33,13 @@ fn exp32(z: Complex(f32)) Complex(f32) {
     const hy = @bitCast(u32, y) & 0x7fffffff;
     // cexp(x + i0) = exp(x) + i0
     if (hy == 0) {
-        return Complex(f32).init(math.exp(x), y);
+        return Complex(f32).init(@exp(x), y);
     }
 
     const hx = @bitCast(u32, x);
     // cexp(0 + iy) = cos(y) + isin(y)
     if ((hx & 0x7fffffff) == 0) {
-        return Complex(f32).init(math.cos(y), math.sin(y));
+        return Complex(f32).init(@cos(y), @sin(y));
     }
 
     if (hy >= 0x7f800000) {
@@ -63,8 +63,8 @@ fn exp32(z: Complex(f32)) Complex(f32) {
     // - x = +-inf
     // - x = nan
     else {
-        const exp_x = math.exp(x);
-        return Complex(f32).init(exp_x * math.cos(y), exp_x * math.sin(y));
+        const exp_x = @exp(x);
+        return Complex(f32).init(exp_x * @cos(y), exp_x * @sin(y));
     }
 }
 
@@ -81,7 +81,7 @@ fn exp64(z: Complex(f64)) Complex(f64) {
 
     // cexp(x + i0) = exp(x) + i0
     if (hy | ly == 0) {
-        return Complex(f64).init(math.exp(x), y);
+        return Complex(f64).init(@exp(x), y);
     }
 
     const fx = @bitCast(u64, x);
@@ -90,7 +90,7 @@ fn exp64(z: Complex(f64)) Complex(f64) {
 
     // cexp(0 + iy) = cos(y) + isin(y)
     if ((hx & 0x7fffffff) | lx == 0) {
-        return Complex(f64).init(math.cos(y), math.sin(y));
+        return Complex(f64).init(@cos(y), @sin(y));
     }
 
     if (hy >= 0x7ff00000) {
@@ -114,13 +114,13 @@ fn exp64(z: Complex(f64)) Complex(f64) {
     // - x = +-inf
     // - x = nan
     else {
-        const exp_x = math.exp(x);
-        return Complex(f64).init(exp_x * math.cos(y), exp_x * math.sin(y));
+        const exp_x = @exp(x);
+        return Complex(f64).init(exp_x * @cos(y), exp_x * @sin(y));
     }
 }
 
 test "complex.cexp32" {
-    const tolerance_f32 = math.sqrt(math.floatEps(f32));
+    const tolerance_f32 = @sqrt(math.floatEps(f32));
 
     {
         const a = Complex(f32).init(5, 3);
@@ -140,7 +140,7 @@ test "complex.cexp32" {
 }
 
 test "complex.cexp64" {
-    const tolerance_f64 = math.sqrt(math.floatEps(f64));
+    const tolerance_f64 = @sqrt(math.floatEps(f64));
 
     {
         const a = Complex(f64).init(5, 3);
diff --git a/lib/std/math/complex/ldexp.zig b/lib/std/math/complex/ldexp.zig
index db710a043869..c196d4afe6df 100644
--- a/lib/std/math/complex/ldexp.zig
+++ b/lib/std/math/complex/ldexp.zig
@@ -26,7 +26,7 @@ fn frexp_exp32(x: f32, expt: *i32) f32 {
     const k = 235; // reduction constant
     const kln2 = 162.88958740; // k * ln2
 
-    const exp_x = math.exp(x - kln2);
+    const exp_x = @exp(x - kln2);
     const hx = @bitCast(u32, exp_x);
     // TODO zig should allow this cast implicitly because it should know the value is in range
     expt.* = @intCast(i32, hx >> 23) - (0x7f + 127) + k;
@@ -45,8 +45,8 @@ fn ldexp_cexp32(z: Complex(f32), expt: i32) Complex(f32) {
     const scale2 = @bitCast(f32, (0x7f + half_expt2) << 23);
 
     return Complex(f32).init(
-        math.cos(z.im) * exp_x * scale1 * scale2,
-        math.sin(z.im) * exp_x * scale1 * scale2,
+        @cos(z.im) * exp_x * scale1 * scale2,
+        @sin(z.im) * exp_x * scale1 * scale2,
     );
 }
 
@@ -54,7 +54,7 @@ fn frexp_exp64(x: f64, expt: *i32) f64 {
     const k = 1799; // reduction constant
     const kln2 = 1246.97177782734161156; // k * ln2
 
-    const exp_x = math.exp(x - kln2);
+    const exp_x = @exp(x - kln2);
 
     const fx = @bitCast(u64, exp_x);
     const hx = @intCast(u32, fx >> 32);
@@ -78,7 +78,7 @@ fn ldexp_cexp64(z: Complex(f64), expt: i32) Complex(f64) {
     const scale2 = @bitCast(f64, (0x3ff + half_expt2) << (20 + 32));
 
     return Complex(f64).init(
-        math.cos(z.im) * exp_x * scale1 * scale2,
-        math.sin(z.im) * exp_x * scale1 * scale2,
+        @cos(z.im) * exp_x * scale1 * scale2,
+        @sin(z.im) * exp_x * scale1 * scale2,
     );
 }
diff --git a/lib/std/math/complex/log.zig b/lib/std/math/complex/log.zig
index 90c51058cffe..6d1b06d2720d 100644
--- a/lib/std/math/complex/log.zig
+++ b/lib/std/math/complex/log.zig
@@ -10,7 +10,7 @@ pub fn log(z: anytype) Complex(@TypeOf(z.re)) {
     const r = cmath.abs(z);
     const phi = cmath.arg(z);
 
-    return Complex(T).init(math.ln(r), phi);
+    return Complex(T).init(@log(r), phi);
 }
 
 const epsilon = 0.0001;
diff --git a/lib/std/math/complex/sinh.zig b/lib/std/math/complex/sinh.zig
index 851af3e62e3a..1569565ecc3c 100644
--- a/lib/std/math/complex/sinh.zig
+++ b/lib/std/math/complex/sinh.zig
@@ -38,25 +38,25 @@ fn sinh32(z: Complex(f32)) Complex(f32) {
         }
         // small x: normal case
         if (ix < 0x41100000) {
-            return Complex(f32).init(math.sinh(x) * math.cos(y), math.cosh(x) * math.sin(y));
+            return Complex(f32).init(math.sinh(x) * @cos(y), math.cosh(x) * @sin(y));
         }
 
         // |x|>= 9, so cosh(x) ~= exp(|x|)
         if (ix < 0x42b17218) {
             // x < 88.7: exp(|x|) won't overflow
-            const h = math.exp(math.fabs(x)) * 0.5;
-            return Complex(f32).init(math.copysign(f32, h, x) * math.cos(y), h * math.sin(y));
+            const h = @exp(@fabs(x)) * 0.5;
+            return Complex(f32).init(math.copysign(f32, h, x) * @cos(y), h * @sin(y));
         }
         // x < 192.7: scale to avoid overflow
         else if (ix < 0x4340b1e7) {
-            const v = Complex(f32).init(math.fabs(x), y);
+            const v = Complex(f32).init(@fabs(x), y);
             const r = ldexp_cexp(v, -1);
             return Complex(f32).init(r.re * math.copysign(f32, 1, x), r.im);
         }
         // x >= 192.7: result always overflows
         else {
             const h = 0x1p127 * x;
-            return Complex(f32).init(h * math.cos(y), h * h * math.sin(y));
+            return Complex(f32).init(h * @cos(y), h * h * @sin(y));
         }
     }
 
@@ -79,7 +79,7 @@ fn sinh32(z: Complex(f32)) Complex(f32) {
         if (iy >= 0x7f800000) {
             return Complex(f32).init(x * x, x * (y - y));
         }
-        return Complex(f32).init(x * math.cos(y), math.inf(f32) * math.sin(y));
+        return Complex(f32).init(x * @cos(y), math.inf(f32) * @sin(y));
     }
 
     return Complex(f32).init((x * x) * (y - y), (x + x) * (y - y));
@@ -105,25 +105,25 @@ fn sinh64(z: Complex(f64)) Complex(f64) {
         }
         // small x: normal case
         if (ix < 0x40360000) {
-            return Complex(f64).init(math.sinh(x) * math.cos(y), math.cosh(x) * math.sin(y));
+            return Complex(f64).init(math.sinh(x) * @cos(y), math.cosh(x) * @sin(y));
         }
 
         // |x|>= 22, so cosh(x) ~= exp(|x|)
         if (ix < 0x40862e42) {
             // x < 710: exp(|x|) won't overflow
-            const h = math.exp(math.fabs(x)) * 0.5;
-            return Complex(f64).init(math.copysign(f64, h, x) * math.cos(y), h * math.sin(y));
+            const h = @exp(@fabs(x)) * 0.5;
+            return Complex(f64).init(math.copysign(f64, h, x) * @cos(y), h * @sin(y));
         }
         // x < 1455: scale to avoid overflow
         else if (ix < 0x4096bbaa) {
-            const v = Complex(f64).init(math.fabs(x), y);
+            const v = Complex(f64).init(@fabs(x), y);
             const r = ldexp_cexp(v, -1);
             return Complex(f64).init(r.re * math.copysign(f64, 1, x), r.im);
         }
         // x >= 1455: result always overflows
         else {
             const h = 0x1p1023 * x;
-            return Complex(f64).init(h * math.cos(y), h * h * math.sin(y));
+            return Complex(f64).init(h * @cos(y), h * h * @sin(y));
         }
     }
 
@@ -146,7 +146,7 @@ fn sinh64(z: Complex(f64)) Complex(f64) {
         if (iy >= 0x7ff00000) {
             return Complex(f64).init(x * x, x * (y - y));
         }
-        return Complex(f64).init(x * math.cos(y), math.inf(f64) * math.sin(y));
+        return Complex(f64).init(x * @cos(y), math.inf(f64) * @sin(y));
     }
 
     return Complex(f64).init((x * x) * (y - y), (x + x) * (y - y));
diff --git a/lib/std/math/complex/sqrt.zig b/lib/std/math/complex/sqrt.zig
index 4f16e631b836..ab24e2d60dec 100644
--- a/lib/std/math/complex/sqrt.zig
+++ b/lib/std/math/complex/sqrt.zig
@@ -43,7 +43,7 @@ fn sqrt32(z: Complex(f32)) Complex(f32) {
         // sqrt(-inf + i nan)   = nan +- inf i
         // sqrt(-inf + iy)      = 0 + inf i
         if (math.signbit(x)) {
-            return Complex(f32).init(math.fabs(x - y), math.copysign(f32, x, y));
+            return Complex(f32).init(@fabs(x - y), math.copysign(f32, x, y));
         } else {
             return Complex(f32).init(x, math.copysign(f32, y - y, y));
         }
@@ -56,15 +56,15 @@ fn sqrt32(z: Complex(f32)) Complex(f32) {
     const dy = @as(f64, y);
 
     if (dx >= 0) {
-        const t = math.sqrt((dx + math.hypot(f64, dx, dy)) * 0.5);
+        const t = @sqrt((dx + math.hypot(f64, dx, dy)) * 0.5);
         return Complex(f32).init(
             @floatCast(f32, t),
             @floatCast(f32, dy / (2.0 * t)),
         );
     } else {
-        const t = math.sqrt((-dx + math.hypot(f64, dx, dy)) * 0.5);
+        const t = @sqrt((-dx + math.hypot(f64, dx, dy)) * 0.5);
         return Complex(f32).init(
-            @floatCast(f32, math.fabs(y) / (2.0 * t)),
+            @floatCast(f32, @fabs(y) / (2.0 * t)),
             @floatCast(f32, math.copysign(f64, t, y)),
         );
     }
@@ -94,7 +94,7 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
         // sqrt(-inf + i nan)   = nan +- inf i
         // sqrt(-inf + iy)      = 0 + inf i
         if (math.signbit(x)) {
-            return Complex(f64).init(math.fabs(x - y), math.copysign(f64, x, y));
+            return Complex(f64).init(@fabs(x - y), math.copysign(f64, x, y));
         } else {
             return Complex(f64).init(x, math.copysign(f64, y - y, y));
         }
@@ -104,7 +104,7 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
 
     // scale to avoid overflow
     var scale = false;
-    if (math.fabs(x) >= threshold or math.fabs(y) >= threshold) {
+    if (@fabs(x) >= threshold or @fabs(y) >= threshold) {
         x *= 0.25;
         y *= 0.25;
         scale = true;
@@ -112,11 +112,11 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
 
     var result: Complex(f64) = undefined;
     if (x >= 0) {
-        const t = math.sqrt((x + math.hypot(f64, x, y)) * 0.5);
+        const t = @sqrt((x + math.hypot(f64, x, y)) * 0.5);
         result = Complex(f64).init(t, y / (2.0 * t));
     } else {
-        const t = math.sqrt((-x + math.hypot(f64, x, y)) * 0.5);
-        result = Complex(f64).init(math.fabs(y) / (2.0 * t), math.copysign(f64, t, y));
+        const t = @sqrt((-x + math.hypot(f64, x, y)) * 0.5);
+        result = Complex(f64).init(@fabs(y) / (2.0 * t), math.copysign(f64, t, y));
     }
 
     if (scale) {
diff --git a/lib/std/math/complex/tanh.zig b/lib/std/math/complex/tanh.zig
index 0960c66679a8..2ed2cb960975 100644
--- a/lib/std/math/complex/tanh.zig
+++ b/lib/std/math/complex/tanh.zig
@@ -33,7 +33,7 @@ fn tanh32(z: Complex(f32)) Complex(f32) {
             return Complex(f32).init(x, r);
         }
         const xx = @bitCast(f32, hx - 0x40000000);
-        const r = if (math.isInf(y)) y else math.sin(y) * math.cos(y);
+        const r = if (math.isInf(y)) y else @sin(y) * @cos(y);
         return Complex(f32).init(xx, math.copysign(f32, 0, r));
     }
 
@@ -44,15 +44,15 @@ fn tanh32(z: Complex(f32)) Complex(f32) {
 
     // x >= 11
     if (ix >= 0x41300000) {
-        const exp_mx = math.exp(-math.fabs(x));
-        return Complex(f32).init(math.copysign(f32, 1, x), 4 * math.sin(y) * math.cos(y) * exp_mx * exp_mx);
+        const exp_mx = @exp(-@fabs(x));
+        return Complex(f32).init(math.copysign(f32, 1, x), 4 * @sin(y) * @cos(y) * exp_mx * exp_mx);
     }
 
     // Kahan's algorithm
-    const t = math.tan(y);
+    const t = @tan(y);
     const beta = 1.0 + t * t;
     const s = math.sinh(x);
-    const rho = math.sqrt(1 + s * s);
+    const rho = @sqrt(1 + s * s);
     const den = 1 + beta * s * s;
 
     return Complex(f32).init((beta * rho * s) / den, t / den);
@@ -76,7 +76,7 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
         }
 
         const xx = @bitCast(f64, (@as(u64, hx - 0x40000000) << 32) | lx);
-        const r = if (math.isInf(y)) y else math.sin(y) * math.cos(y);
+        const r = if (math.isInf(y)) y else @sin(y) * @cos(y);
         return Complex(f64).init(xx, math.copysign(f64, 0, r));
     }
 
@@ -87,15 +87,15 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
 
     // x >= 22
     if (ix >= 0x40360000) {
-        const exp_mx = math.exp(-math.fabs(x));
-        return Complex(f64).init(math.copysign(f64, 1, x), 4 * math.sin(y) * math.cos(y) * exp_mx * exp_mx);
+        const exp_mx = @exp(-@fabs(x));
+        return Complex(f64).init(math.copysign(f64, 1, x), 4 * @sin(y) * @cos(y) * exp_mx * exp_mx);
     }
 
     // Kahan's algorithm
-    const t = math.tan(y);
+    const t = @tan(y);
     const beta = 1.0 + t * t;
     const s = math.sinh(x);
-    const rho = math.sqrt(1 + s * s);
+    const rho = @sqrt(1 + s * s);
     const den = 1 + beta * s * s;
 
     return Complex(f64).init((beta * rho * s) / den, t / den);
diff --git a/lib/std/math/cos.zig b/lib/std/math/cos.zig
deleted file mode 100644
index 22bae0daeefc..000000000000
--- a/lib/std/math/cos.zig
+++ /dev/null
@@ -1,154 +0,0 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/cosf.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/cos.c
-
-const std = @import("../std.zig");
-const math = std.math;
-const expect = std.testing.expect;
-
-const kernel = @import("__trig.zig");
-const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
-const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
-
-/// Returns the cosine of the radian value x.
-///
-/// Special Cases:
-///  - cos(+-inf) = nan
-///  - cos(nan)   = nan
-pub fn cos(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => cos32(x),
-        f64 => cos64(x),
-        else => @compileError("cos not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn cos32(x: f32) f32 {
-    // Small multiples of pi/2 rounded to double precision.
-    const c1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
-    const c2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
-    const c3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
-    const c4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
-
-    var ix = @bitCast(u32, x);
-    const sign = ix >> 31 != 0;
-    ix &= 0x7fffffff;
-
-    if (ix <= 0x3f490fda) { // |x| ~<= pi/4
-        if (ix < 0x39800000) { // |x| < 2**-12
-            // raise inexact if x != 0
-            math.doNotOptimizeAway(x + 0x1p120);
-            return 1.0;
-        }
-        return kernel.__cosdf(x);
-    }
-    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
-        if (ix > 0x4016cbe3) { // |x|  ~> 3*pi/4
-            return -kernel.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
-        } else {
-            if (sign) {
-                return kernel.__sindf(x + c1pio2);
-            } else {
-                return kernel.__sindf(c1pio2 - x);
-            }
-        }
-    }
-    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
-        if (ix > 0x40afeddf) { // |x| ~> 7*pi/4
-            return kernel.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
-        } else {
-            if (sign) {
-                return kernel.__sindf(-x - c3pio2);
-            } else {
-                return kernel.__sindf(x - c3pio2);
-            }
-        }
-    }
-
-    // cos(Inf or NaN) is NaN
-    if (ix >= 0x7f800000) {
-        return x - x;
-    }
-
-    var y: f64 = undefined;
-    const n = __rem_pio2f(x, &y);
-    return switch (n & 3) {
-        0 => kernel.__cosdf(y),
-        1 => kernel.__sindf(-y),
-        2 => -kernel.__cosdf(y),
-        else => kernel.__sindf(y),
-    };
-}
-
-fn cos64(x: f64) f64 {
-    var ix = @bitCast(u64, x) >> 32;
-    ix &= 0x7fffffff;
-
-    // |x| ~< pi/4
-    if (ix <= 0x3fe921fb) {
-        if (ix < 0x3e46a09e) { // |x| < 2**-27 * sqrt(2)
-            // raise inexact if x!=0
-            math.doNotOptimizeAway(x + 0x1p120);
-            return 1.0;
-        }
-        return kernel.__cos(x, 0);
-    }
-
-    // cos(Inf or NaN) is NaN
-    if (ix >= 0x7ff00000) {
-        return x - x;
-    }
-
-    var y: [2]f64 = undefined;
-    const n = __rem_pio2(x, &y);
-    return switch (n & 3) {
-        0 => kernel.__cos(y[0], y[1]),
-        1 => -kernel.__sin(y[0], y[1], 1),
-        2 => -kernel.__cos(y[0], y[1]),
-        else => kernel.__sin(y[0], y[1], 1),
-    };
-}
-
-test "math.cos" {
-    try expect(cos(@as(f32, 0.0)) == cos32(0.0));
-    try expect(cos(@as(f64, 0.0)) == cos64(0.0));
-}
-
-test "math.cos32" {
-    const epsilon = 0.00001;
-
-    try expect(math.approxEqAbs(f32, cos32(0.0), 1.0, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(0.2), 0.980067, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(0.8923), 0.627623, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(1.5), 0.070737, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(-1.5), 0.070737, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(37.45), 0.969132, epsilon));
-    try expect(math.approxEqAbs(f32, cos32(89.123), 0.400798, epsilon));
-}
-
-test "math.cos64" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f64, cos64(0.0), 1.0, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(0.2), 0.980067, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(0.8923), 0.627623, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(1.5), 0.070737, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(-1.5), 0.070737, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(37.45), 0.969132, epsilon));
-    try expect(math.approxEqAbs(f64, cos64(89.123), 0.40080, epsilon));
-}
-
-test "math.cos32.special" {
-    try expect(math.isNan(cos32(math.inf(f32))));
-    try expect(math.isNan(cos32(-math.inf(f32))));
-    try expect(math.isNan(cos32(math.nan(f32))));
-}
-
-test "math.cos64.special" {
-    try expect(math.isNan(cos64(math.inf(f64))));
-    try expect(math.isNan(cos64(-math.inf(f64))));
-    try expect(math.isNan(cos64(math.nan(f64))));
-}
diff --git a/lib/std/math/cosh.zig b/lib/std/math/cosh.zig
index c71e82ea1c71..d633f2fa0c64 100644
--- a/lib/std/math/cosh.zig
+++ b/lib/std/math/cosh.zig
@@ -45,7 +45,7 @@ fn cosh32(x: f32) f32 {
 
     // |x| < log(FLT_MAX)
     if (ux < 0x42B17217) {
-        const t = math.exp(ax);
+        const t = @exp(ax);
         return 0.5 * (t + 1 / t);
     }
 
@@ -77,7 +77,7 @@ fn cosh64(x: f64) f64 {
 
     // |x| < log(DBL_MAX)
     if (w < 0x40862E42) {
-        const t = math.exp(ax);
+        const t = @exp(ax);
         // NOTE: If x > log(0x1p26) then 1/t is not required.
         return 0.5 * (t + 1 / t);
     }
diff --git a/lib/std/math/expo2.zig b/lib/std/math/expo2.zig
index f404570fb692..4345233173cf 100644
--- a/lib/std/math/expo2.zig
+++ b/lib/std/math/expo2.zig
@@ -22,7 +22,7 @@ fn expo2f(x: f32) f32 {
 
     const u = (0x7F + k / 2) << 23;
     const scale = @bitCast(f32, u);
-    return math.exp(x - kln2) * scale * scale;
+    return @exp(x - kln2) * scale * scale;
 }
 
 fn expo2d(x: f64) f64 {
@@ -31,5 +31,5 @@ fn expo2d(x: f64) f64 {
 
     const u = (0x3FF + k / 2) << 20;
     const scale = @bitCast(f64, @as(u64, u) << 32);
-    return math.exp(x - kln2) * scale * scale;
+    return @exp(x - kln2) * scale * scale;
 }
diff --git a/lib/std/math/fabs.zig b/lib/std/math/fabs.zig
deleted file mode 100644
index 44918e75d91c..000000000000
--- a/lib/std/math/fabs.zig
+++ /dev/null
@@ -1,45 +0,0 @@
-const std = @import("../std.zig");
-const math = std.math;
-const expect = std.testing.expect;
-
-/// Returns the absolute value of x.
-///
-/// Special Cases:
-///  - fabs(+-inf) = +inf
-///  - fabs(nan)   = nan
-pub fn fabs(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    const TBits = std.meta.Int(.unsigned, @bitSizeOf(T));
-    if (@typeInfo(T) != .Float) {
-        @compileError("fabs not implemented for " ++ @typeName(T));
-    }
-
-    const float_bits = @bitCast(TBits, x);
-    const remove_sign = ~@as(TBits, 0) >> 1;
-
-    return @bitCast(T, float_bits & remove_sign);
-}
-
-test "math.fabs" {
-    // TODO add support for c_longdouble here
-    inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
-        // normals
-        try expect(fabs(@as(T, 1.0)) == 1.0);
-        try expect(fabs(@as(T, -1.0)) == 1.0);
-        try expect(fabs(math.floatMin(T)) == math.floatMin(T));
-        try expect(fabs(-math.floatMin(T)) == math.floatMin(T));
-        try expect(fabs(math.floatMax(T)) == math.floatMax(T));
-        try expect(fabs(-math.floatMax(T)) == math.floatMax(T));
-
-        // subnormals
-        try expect(fabs(@as(T, 0.0)) == 0.0);
-        try expect(fabs(@as(T, -0.0)) == 0.0);
-        try expect(fabs(math.floatTrueMin(T)) == math.floatTrueMin(T));
-        try expect(fabs(-math.floatTrueMin(T)) == math.floatTrueMin(T));
-
-        // non-finite numbers
-        try expect(math.isPositiveInf(fabs(math.inf(T))));
-        try expect(math.isPositiveInf(fabs(-math.inf(T))));
-        try expect(math.isNan(fabs(math.nan(T))));
-    }
-}
diff --git a/lib/std/math/hypot.zig b/lib/std/math/hypot.zig
index e47a1918928d..981f6143feb5 100644
--- a/lib/std/math/hypot.zig
+++ b/lib/std/math/hypot.zig
@@ -56,7 +56,7 @@ fn hypot32(x: f32, y: f32) f32 {
         yy *= 0x1.0p-90;
     }
 
-    return z * math.sqrt(@floatCast(f32, @as(f64, x) * x + @as(f64, y) * y));
+    return z * @sqrt(@floatCast(f32, @as(f64, x) * x + @as(f64, y) * y));
 }
 
 fn sq(hi: *f64, lo: *f64, x: f64) void {
@@ -117,7 +117,7 @@ fn hypot64(x: f64, y: f64) f64 {
     sq(&hx, &lx, x);
     sq(&hy, &ly, y);
 
-    return z * math.sqrt(ly + lx + hy + hx);
+    return z * @sqrt(ly + lx + hy + hx);
 }
 
 test "math.hypot" {
diff --git a/lib/std/math/ln.zig b/lib/std/math/ln.zig
index bb352cd6e1bf..65db861587d8 100644
--- a/lib/std/math/ln.zig
+++ b/lib/std/math/ln.zig
@@ -1,12 +1,6 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/lnf.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/ln.c
-
 const std = @import("../std.zig");
 const math = std.math;
-const expect = std.testing.expect;
+const testing = std.testing;
 
 /// Returns the natural logarithm of x.
 ///
@@ -15,175 +9,26 @@ const expect = std.testing.expect;
 ///  - ln(0)     = -inf
 ///  - ln(x)     = nan if x < 0
 ///  - ln(nan)   = nan
+///  TODO remove this in favor of `@log`.
 pub fn ln(x: anytype) @TypeOf(x) {
     const T = @TypeOf(x);
     switch (@typeInfo(T)) {
         .ComptimeFloat => {
-            return @as(comptime_float, ln_64(x));
-        },
-        .Float => {
-            return switch (T) {
-                f32 => ln_32(x),
-                f64 => ln_64(x),
-                else => @compileError("ln not implemented for " ++ @typeName(T)),
-            };
+            return @as(comptime_float, @log(x));
         },
+        .Float => return @log(x),
         .ComptimeInt => {
-            return @as(comptime_int, math.floor(ln_64(@as(f64, x))));
+            return @as(comptime_int, @floor(@log(@as(f64, x))));
         },
         .Int => |IntType| switch (IntType.signedness) {
             .signed => @compileError("ln not implemented for signed integers"),
-            .unsigned => return @as(T, math.floor(ln_64(@as(f64, x)))),
+            .unsigned => return @as(T, @floor(@log(@as(f64, x)))),
         },
         else => @compileError("ln not implemented for " ++ @typeName(T)),
     }
 }
 
-pub fn ln_32(x_: f32) f32 {
-    const ln2_hi: f32 = 6.9313812256e-01;
-    const ln2_lo: f32 = 9.0580006145e-06;
-    const Lg1: f32 = 0xaaaaaa.0p-24;
-    const Lg2: f32 = 0xccce13.0p-25;
-    const Lg3: f32 = 0x91e9ee.0p-25;
-    const Lg4: f32 = 0xf89e26.0p-26;
-
-    var x = x_;
-    var ix = @bitCast(u32, x);
-    var k: i32 = 0;
-
-    // x < 2^(-126)
-    if (ix < 0x00800000 or ix >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f32);
-        }
-        // log(-#) = nan
-        if (ix >> 31 != 0) {
-            return math.nan(f32);
-        }
-
-        // subnormal, scale x
-        k -= 25;
-        x *= 0x1.0p25;
-        ix = @bitCast(u32, x);
-    } else if (ix >= 0x7F800000) {
-        return x;
-    } else if (ix == 0x3F800000) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    ix += 0x3F800000 - 0x3F3504F3;
-    k += @intCast(i32, ix >> 23) - 0x7F;
-    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
-    x = @bitCast(f32, ix);
-
-    const f = x - 1.0;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * Lg4);
-    const t2 = z * (Lg1 + w * Lg3);
-    const R = t2 + t1;
-    const hfsq = 0.5 * f * f;
-    const dk = @intToFloat(f32, k);
-
-    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
-}
-
-pub fn ln_64(x_: f64) f64 {
-    const ln2_hi: f64 = 6.93147180369123816490e-01;
-    const ln2_lo: f64 = 1.90821492927058770002e-10;
-    const Lg1: f64 = 6.666666666666735130e-01;
-    const Lg2: f64 = 3.999999999940941908e-01;
-    const Lg3: f64 = 2.857142874366239149e-01;
-    const Lg4: f64 = 2.222219843214978396e-01;
-    const Lg5: f64 = 1.818357216161805012e-01;
-    const Lg6: f64 = 1.531383769920937332e-01;
-    const Lg7: f64 = 1.479819860511658591e-01;
-
-    var x = x_;
-    var ix = @bitCast(u64, x);
-    var hx = @intCast(u32, ix >> 32);
-    var k: i32 = 0;
-
-    if (hx < 0x00100000 or hx >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f64);
-        }
-        // log(-#) = nan
-        if (hx >> 31 != 0) {
-            return math.nan(f64);
-        }
-
-        // subnormal, scale x
-        k -= 54;
-        x *= 0x1.0p54;
-        hx = @intCast(u32, @bitCast(u64, ix) >> 32);
-    } else if (hx >= 0x7FF00000) {
-        return x;
-    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    hx += 0x3FF00000 - 0x3FE6A09E;
-    k += @intCast(i32, hx >> 20) - 0x3FF;
-    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
-    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
-    x = @bitCast(f64, ix);
-
-    const f = x - 1.0;
-    const hfsq = 0.5 * f * f;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
-    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
-    const R = t2 + t1;
-    const dk = @intToFloat(f64, k);
-
-    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
-}
-
 test "math.ln" {
-    try expect(ln(@as(f32, 0.2)) == ln_32(0.2));
-    try expect(ln(@as(f64, 0.2)) == ln_64(0.2));
-}
-
-test "math.ln32" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f32, ln_32(0.2), -1.609438, epsilon));
-    try expect(math.approxEqAbs(f32, ln_32(0.8923), -0.113953, epsilon));
-    try expect(math.approxEqAbs(f32, ln_32(1.5), 0.405465, epsilon));
-    try expect(math.approxEqAbs(f32, ln_32(37.45), 3.623007, epsilon));
-    try expect(math.approxEqAbs(f32, ln_32(89.123), 4.490017, epsilon));
-    try expect(math.approxEqAbs(f32, ln_32(123123.234375), 11.720941, epsilon));
-}
-
-test "math.ln64" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f64, ln_64(0.2), -1.609438, epsilon));
-    try expect(math.approxEqAbs(f64, ln_64(0.8923), -0.113953, epsilon));
-    try expect(math.approxEqAbs(f64, ln_64(1.5), 0.405465, epsilon));
-    try expect(math.approxEqAbs(f64, ln_64(37.45), 3.623007, epsilon));
-    try expect(math.approxEqAbs(f64, ln_64(89.123), 4.490017, epsilon));
-    try expect(math.approxEqAbs(f64, ln_64(123123.234375), 11.720941, epsilon));
-}
-
-test "math.ln32.special" {
-    try expect(math.isPositiveInf(ln_32(math.inf(f32))));
-    try expect(math.isNegativeInf(ln_32(0.0)));
-    try expect(math.isNan(ln_32(-1.0)));
-    try expect(math.isNan(ln_32(math.nan(f32))));
-}
-
-test "math.ln64.special" {
-    try expect(math.isPositiveInf(ln_64(math.inf(f64))));
-    try expect(math.isNegativeInf(ln_64(0.0)));
-    try expect(math.isNan(ln_64(-1.0)));
-    try expect(math.isNan(ln_64(math.nan(f64))));
+    try testing.expect(ln(@as(f32, 0.2)) == @log(0.2));
+    try testing.expect(ln(@as(f64, 0.2)) == @log(0.2));
 }
diff --git a/lib/std/math/log.zig b/lib/std/math/log.zig
index 6336726b39ce..ad2763fa5412 100644
--- a/lib/std/math/log.zig
+++ b/lib/std/math/log.zig
@@ -15,28 +15,28 @@ pub fn log(comptime T: type, base: T, x: T) T {
     } else if (base == 10) {
         return math.log10(x);
     } else if ((@typeInfo(T) == .Float or @typeInfo(T) == .ComptimeFloat) and base == math.e) {
-        return math.ln(x);
+        return @log(x);
     }
 
     const float_base = math.lossyCast(f64, base);
     switch (@typeInfo(T)) {
         .ComptimeFloat => {
-            return @as(comptime_float, math.ln(@as(f64, x)) / math.ln(float_base));
+            return @as(comptime_float, @log(@as(f64, x)) / @log(float_base));
         },
         .ComptimeInt => {
-            return @as(comptime_int, math.floor(math.ln(@as(f64, x)) / math.ln(float_base)));
+            return @as(comptime_int, @floor(@log(@as(f64, x)) / @log(float_base)));
         },
 
         // TODO implement integer log without using float math
         .Int => |IntType| switch (IntType.signedness) {
             .signed => @compileError("log not implemented for signed integers"),
-            .unsigned => return @floatToInt(T, math.floor(math.ln(@intToFloat(f64, x)) / math.ln(float_base))),
+            .unsigned => return @floatToInt(T, @floor(@log(@intToFloat(f64, x)) / @log(float_base))),
         },
 
         .Float => {
             switch (T) {
-                f32 => return @floatCast(f32, math.ln(@as(f64, x)) / math.ln(float_base)),
-                f64 => return math.ln(x) / math.ln(float_base),
+                f32 => return @floatCast(f32, @log(@as(f64, x)) / @log(float_base)),
+                f64 => return @log(x) / @log(float_base),
                 else => @compileError("log not implemented for " ++ @typeName(T)),
             }
         },
diff --git a/lib/std/math/log10.zig b/lib/std/math/log10.zig
index 84eced85f0c6..4f1342607957 100644
--- a/lib/std/math/log10.zig
+++ b/lib/std/math/log10.zig
@@ -1,9 +1,3 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/log10f.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/log10.c
-
 const std = @import("../std.zig");
 const math = std.math;
 const testing = std.testing;
@@ -20,198 +14,16 @@ pub fn log10(x: anytype) @TypeOf(x) {
     const T = @TypeOf(x);
     switch (@typeInfo(T)) {
         .ComptimeFloat => {
-            return @as(comptime_float, log10_64(x));
-        },
-        .Float => {
-            return switch (T) {
-                f32 => log10_32(x),
-                f64 => log10_64(x),
-                else => @compileError("log10 not implemented for " ++ @typeName(T)),
-            };
+            return @as(comptime_float, @log10(x));
         },
+        .Float => return @log10(x),
         .ComptimeInt => {
-            return @as(comptime_int, math.floor(log10_64(@as(f64, x))));
+            return @as(comptime_int, @floor(@log10(@as(f64, x))));
         },
         .Int => |IntType| switch (IntType.signedness) {
             .signed => @compileError("log10 not implemented for signed integers"),
-            .unsigned => return @floatToInt(T, math.floor(log10_64(@intToFloat(f64, x)))),
+            .unsigned => return @floatToInt(T, @floor(@log10(@intToFloat(f64, x)))),
         },
         else => @compileError("log10 not implemented for " ++ @typeName(T)),
     }
 }
-
-pub fn log10_32(x_: f32) f32 {
-    const ivln10hi: f32 = 4.3432617188e-01;
-    const ivln10lo: f32 = -3.1689971365e-05;
-    const log10_2hi: f32 = 3.0102920532e-01;
-    const log10_2lo: f32 = 7.9034151668e-07;
-    const Lg1: f32 = 0xaaaaaa.0p-24;
-    const Lg2: f32 = 0xccce13.0p-25;
-    const Lg3: f32 = 0x91e9ee.0p-25;
-    const Lg4: f32 = 0xf89e26.0p-26;
-
-    var x = x_;
-    var u = @bitCast(u32, x);
-    var ix = u;
-    var k: i32 = 0;
-
-    // x < 2^(-126)
-    if (ix < 0x00800000 or ix >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f32);
-        }
-        // log(-#) = nan
-        if (ix >> 31 != 0) {
-            return math.nan(f32);
-        }
-
-        k -= 25;
-        x *= 0x1.0p25;
-        ix = @bitCast(u32, x);
-    } else if (ix >= 0x7F800000) {
-        return x;
-    } else if (ix == 0x3F800000) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    ix += 0x3F800000 - 0x3F3504F3;
-    k += @intCast(i32, ix >> 23) - 0x7F;
-    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
-    x = @bitCast(f32, ix);
-
-    const f = x - 1.0;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * Lg4);
-    const t2 = z * (Lg1 + w * Lg3);
-    const R = t2 + t1;
-    const hfsq = 0.5 * f * f;
-
-    var hi = f - hfsq;
-    u = @bitCast(u32, hi);
-    u &= 0xFFFFF000;
-    hi = @bitCast(f32, u);
-    const lo = f - hi - hfsq + s * (hfsq + R);
-    const dk = @intToFloat(f32, k);
-
-    return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
-}
-
-pub fn log10_64(x_: f64) f64 {
-    const ivln10hi: f64 = 4.34294481878168880939e-01;
-    const ivln10lo: f64 = 2.50829467116452752298e-11;
-    const log10_2hi: f64 = 3.01029995663611771306e-01;
-    const log10_2lo: f64 = 3.69423907715893078616e-13;
-    const Lg1: f64 = 6.666666666666735130e-01;
-    const Lg2: f64 = 3.999999999940941908e-01;
-    const Lg3: f64 = 2.857142874366239149e-01;
-    const Lg4: f64 = 2.222219843214978396e-01;
-    const Lg5: f64 = 1.818357216161805012e-01;
-    const Lg6: f64 = 1.531383769920937332e-01;
-    const Lg7: f64 = 1.479819860511658591e-01;
-
-    var x = x_;
-    var ix = @bitCast(u64, x);
-    var hx = @intCast(u32, ix >> 32);
-    var k: i32 = 0;
-
-    if (hx < 0x00100000 or hx >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f32);
-        }
-        // log(-#) = nan
-        if (hx >> 31 != 0) {
-            return math.nan(f32);
-        }
-
-        // subnormal, scale x
-        k -= 54;
-        x *= 0x1.0p54;
-        hx = @intCast(u32, @bitCast(u64, x) >> 32);
-    } else if (hx >= 0x7FF00000) {
-        return x;
-    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    hx += 0x3FF00000 - 0x3FE6A09E;
-    k += @intCast(i32, hx >> 20) - 0x3FF;
-    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
-    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
-    x = @bitCast(f64, ix);
-
-    const f = x - 1.0;
-    const hfsq = 0.5 * f * f;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
-    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
-    const R = t2 + t1;
-
-    // hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
-    var hi = f - hfsq;
-    var hii = @bitCast(u64, hi);
-    hii &= @as(u64, maxInt(u64)) << 32;
-    hi = @bitCast(f64, hii);
-    const lo = f - hi - hfsq + s * (hfsq + R);
-
-    // val_hi + val_lo ~ log10(1 + f) + k * log10(2)
-    var val_hi = hi * ivln10hi;
-    const dk = @intToFloat(f64, k);
-    const y = dk * log10_2hi;
-    var val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
-
-    // Extra precision multiplication
-    const ww = y + val_hi;
-    val_lo += (y - ww) + val_hi;
-    val_hi = ww;
-
-    return val_lo + val_hi;
-}
-
-test "math.log10" {
-    try testing.expect(log10(@as(f32, 0.2)) == log10_32(0.2));
-    try testing.expect(log10(@as(f64, 0.2)) == log10_64(0.2));
-}
-
-test "math.log10_32" {
-    const epsilon = 0.000001;
-
-    try testing.expect(math.approxEqAbs(f32, log10_32(0.2), -0.698970, epsilon));
-    try testing.expect(math.approxEqAbs(f32, log10_32(0.8923), -0.049489, epsilon));
-    try testing.expect(math.approxEqAbs(f32, log10_32(1.5), 0.176091, epsilon));
-    try testing.expect(math.approxEqAbs(f32, log10_32(37.45), 1.573452, epsilon));
-    try testing.expect(math.approxEqAbs(f32, log10_32(89.123), 1.94999, epsilon));
-    try testing.expect(math.approxEqAbs(f32, log10_32(123123.234375), 5.09034, epsilon));
-}
-
-test "math.log10_64" {
-    const epsilon = 0.000001;
-
-    try testing.expect(math.approxEqAbs(f64, log10_64(0.2), -0.698970, epsilon));
-    try testing.expect(math.approxEqAbs(f64, log10_64(0.8923), -0.049489, epsilon));
-    try testing.expect(math.approxEqAbs(f64, log10_64(1.5), 0.176091, epsilon));
-    try testing.expect(math.approxEqAbs(f64, log10_64(37.45), 1.573452, epsilon));
-    try testing.expect(math.approxEqAbs(f64, log10_64(89.123), 1.94999, epsilon));
-    try testing.expect(math.approxEqAbs(f64, log10_64(123123.234375), 5.09034, epsilon));
-}
-
-test "math.log10_32.special" {
-    try testing.expect(math.isPositiveInf(log10_32(math.inf(f32))));
-    try testing.expect(math.isNegativeInf(log10_32(0.0)));
-    try testing.expect(math.isNan(log10_32(-1.0)));
-    try testing.expect(math.isNan(log10_32(math.nan(f32))));
-}
-
-test "math.log10_64.special" {
-    try testing.expect(math.isPositiveInf(log10_64(math.inf(f64))));
-    try testing.expect(math.isNegativeInf(log10_64(0.0)));
-    try testing.expect(math.isNan(log10_64(-1.0)));
-    try testing.expect(math.isNan(log10_64(math.nan(f64))));
-}
diff --git a/lib/std/math/log2.zig b/lib/std/math/log2.zig
index 556c16f5cf95..c83b170208c0 100644
--- a/lib/std/math/log2.zig
+++ b/lib/std/math/log2.zig
@@ -1,13 +1,6 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/log2f.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/log2.c
-
 const std = @import("../std.zig");
 const math = std.math;
 const expect = std.testing.expect;
-const maxInt = std.math.maxInt;
 
 /// Returns the base-2 logarithm of x.
 ///
@@ -20,15 +13,9 @@ pub fn log2(x: anytype) @TypeOf(x) {
     const T = @TypeOf(x);
     switch (@typeInfo(T)) {
         .ComptimeFloat => {
-            return @as(comptime_float, log2_64(x));
-        },
-        .Float => {
-            return switch (T) {
-                f32 => log2_32(x),
-                f64 => log2_64(x),
-                else => @compileError("log2 not implemented for " ++ @typeName(T)),
-            };
+            return @as(comptime_float, @log2(x));
         },
+        .Float => return @log2(x),
         .ComptimeInt => comptime {
             var result = 0;
             var x_shifted = x;
@@ -46,168 +33,7 @@ pub fn log2(x: anytype) @TypeOf(x) {
     }
 }
 
-pub fn log2_32(x_: f32) f32 {
-    const ivln2hi: f32 = 1.4428710938e+00;
-    const ivln2lo: f32 = -1.7605285393e-04;
-    const Lg1: f32 = 0xaaaaaa.0p-24;
-    const Lg2: f32 = 0xccce13.0p-25;
-    const Lg3: f32 = 0x91e9ee.0p-25;
-    const Lg4: f32 = 0xf89e26.0p-26;
-
-    var x = x_;
-    var u = @bitCast(u32, x);
-    var ix = u;
-    var k: i32 = 0;
-
-    // x < 2^(-126)
-    if (ix < 0x00800000 or ix >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f32);
-        }
-        // log(-#) = nan
-        if (ix >> 31 != 0) {
-            return math.nan(f32);
-        }
-
-        k -= 25;
-        x *= 0x1.0p25;
-        ix = @bitCast(u32, x);
-    } else if (ix >= 0x7F800000) {
-        return x;
-    } else if (ix == 0x3F800000) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    ix += 0x3F800000 - 0x3F3504F3;
-    k += @intCast(i32, ix >> 23) - 0x7F;
-    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
-    x = @bitCast(f32, ix);
-
-    const f = x - 1.0;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * Lg4);
-    const t2 = z * (Lg1 + w * Lg3);
-    const R = t2 + t1;
-    const hfsq = 0.5 * f * f;
-
-    var hi = f - hfsq;
-    u = @bitCast(u32, hi);
-    u &= 0xFFFFF000;
-    hi = @bitCast(f32, u);
-    const lo = f - hi - hfsq + s * (hfsq + R);
-    return (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + @intToFloat(f32, k);
-}
-
-pub fn log2_64(x_: f64) f64 {
-    const ivln2hi: f64 = 1.44269504072144627571e+00;
-    const ivln2lo: f64 = 1.67517131648865118353e-10;
-    const Lg1: f64 = 6.666666666666735130e-01;
-    const Lg2: f64 = 3.999999999940941908e-01;
-    const Lg3: f64 = 2.857142874366239149e-01;
-    const Lg4: f64 = 2.222219843214978396e-01;
-    const Lg5: f64 = 1.818357216161805012e-01;
-    const Lg6: f64 = 1.531383769920937332e-01;
-    const Lg7: f64 = 1.479819860511658591e-01;
-
-    var x = x_;
-    var ix = @bitCast(u64, x);
-    var hx = @intCast(u32, ix >> 32);
-    var k: i32 = 0;
-
-    if (hx < 0x00100000 or hx >> 31 != 0) {
-        // log(+-0) = -inf
-        if (ix << 1 == 0) {
-            return -math.inf(f64);
-        }
-        // log(-#) = nan
-        if (hx >> 31 != 0) {
-            return math.nan(f64);
-        }
-
-        // subnormal, scale x
-        k -= 54;
-        x *= 0x1.0p54;
-        hx = @intCast(u32, @bitCast(u64, x) >> 32);
-    } else if (hx >= 0x7FF00000) {
-        return x;
-    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
-        return 0;
-    }
-
-    // x into [sqrt(2) / 2, sqrt(2)]
-    hx += 0x3FF00000 - 0x3FE6A09E;
-    k += @intCast(i32, hx >> 20) - 0x3FF;
-    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
-    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
-    x = @bitCast(f64, ix);
-
-    const f = x - 1.0;
-    const hfsq = 0.5 * f * f;
-    const s = f / (2.0 + f);
-    const z = s * s;
-    const w = z * z;
-    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
-    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
-    const R = t2 + t1;
-
-    // hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
-    var hi = f - hfsq;
-    var hii = @bitCast(u64, hi);
-    hii &= @as(u64, maxInt(u64)) << 32;
-    hi = @bitCast(f64, hii);
-    const lo = f - hi - hfsq + s * (hfsq + R);
-
-    var val_hi = hi * ivln2hi;
-    var val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
-
-    // spadd(val_hi, val_lo, y)
-    const y = @intToFloat(f64, k);
-    const ww = y + val_hi;
-    val_lo += (y - ww) + val_hi;
-    val_hi = ww;
-
-    return val_lo + val_hi;
-}
-
-test "math.log2" {
-    try expect(log2(@as(f32, 0.2)) == log2_32(0.2));
-    try expect(log2(@as(f64, 0.2)) == log2_64(0.2));
-}
-
-test "math.log2_32" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f32, log2_32(0.2), -2.321928, epsilon));
-    try expect(math.approxEqAbs(f32, log2_32(0.8923), -0.164399, epsilon));
-    try expect(math.approxEqAbs(f32, log2_32(1.5), 0.584962, epsilon));
-    try expect(math.approxEqAbs(f32, log2_32(37.45), 5.226894, epsilon));
-    try expect(math.approxEqAbs(f32, log2_32(123123.234375), 16.909744, epsilon));
-}
-
-test "math.log2_64" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f64, log2_64(0.2), -2.321928, epsilon));
-    try expect(math.approxEqAbs(f64, log2_64(0.8923), -0.164399, epsilon));
-    try expect(math.approxEqAbs(f64, log2_64(1.5), 0.584962, epsilon));
-    try expect(math.approxEqAbs(f64, log2_64(37.45), 5.226894, epsilon));
-    try expect(math.approxEqAbs(f64, log2_64(123123.234375), 16.909744, epsilon));
-}
-
-test "math.log2_32.special" {
-    try expect(math.isPositiveInf(log2_32(math.inf(f32))));
-    try expect(math.isNegativeInf(log2_32(0.0)));
-    try expect(math.isNan(log2_32(-1.0)));
-    try expect(math.isNan(log2_32(math.nan(f32))));
-}
-
-test "math.log2_64.special" {
-    try expect(math.isPositiveInf(log2_64(math.inf(f64))));
-    try expect(math.isNegativeInf(log2_64(0.0)));
-    try expect(math.isNan(log2_64(-1.0)));
-    try expect(math.isNan(log2_64(math.nan(f64))));
+test "log2" {
+    try expect(log2(@as(f32, 0.2)) == @log2(0.2));
+    try expect(log2(@as(f64, 0.2)) == @log2(0.2));
 }
diff --git a/lib/std/math/nan.zig b/lib/std/math/nan.zig
index 634af1f0d6df..8a279372429e 100644
--- a/lib/std/math/nan.zig
+++ b/lib/std/math/nan.zig
@@ -1,27 +1,20 @@
 const math = @import("../math.zig");
 
 /// Returns the nan representation for type T.
-pub fn nan(comptime T: type) T {
-    return switch (T) {
-        f16 => math.nan_f16,
-        f32 => math.nan_f32,
-        f64 => math.nan_f64,
-        f80 => math.nan_f80,
-        f128 => math.nan_f128,
-        else => @compileError("nan not implemented for " ++ @typeName(T)),
+pub inline fn nan(comptime T: type) T {
+    return switch (@typeInfo(T).Float.bits) {
+        16 => math.nan_f16,
+        32 => math.nan_f32,
+        64 => math.nan_f64,
+        80 => math.nan_f80,
+        128 => math.nan_f128,
+        else => @compileError("unreachable"),
     };
 }
 
 /// Returns the signalling nan representation for type T.
-pub fn snan(comptime T: type) T {
-    // Note: A signalling nan is identical to a standard right now by may have a different bit
-    // representation in the future when required.
-    return switch (T) {
-        f16 => @bitCast(f16, math.nan_u16),
-        f32 => @bitCast(f32, math.nan_u32),
-        f64 => @bitCast(f64, math.nan_u64),
-        f80 => @bitCast(f80, math.nan_u80),
-        f128 => @bitCast(f128, math.nan_u128),
-        else => @compileError("snan not implemented for " ++ @typeName(T)),
-    };
+/// Note: A signalling nan is identical to a standard right now by may have a different bit
+/// representation in the future when required.
+pub inline fn snan(comptime T: type) T {
+    return nan(T);
 }
diff --git a/lib/std/math/pow.zig b/lib/std/math/pow.zig
index 040abf9a4484..48c6636926bc 100644
--- a/lib/std/math/pow.zig
+++ b/lib/std/math/pow.zig
@@ -82,7 +82,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
         }
         // pow(x, +inf) = +0    for |x| < 1
         // pow(x, -inf) = +0    for |x| > 1
-        else if ((math.fabs(x) < 1) == math.isPositiveInf(y)) {
+        else if ((@fabs(x) < 1) == math.isPositiveInf(y)) {
             return 0;
         }
         // pow(x, -inf) = +inf  for |x| < 1
@@ -108,14 +108,14 @@ pub fn pow(comptime T: type, x: T, y: T) T {
 
     // special case sqrt
     if (y == 0.5) {
-        return math.sqrt(x);
+        return @sqrt(x);
     }
 
     if (y == -0.5) {
-        return 1 / math.sqrt(x);
+        return 1 / @sqrt(x);
     }
 
-    const r1 = math.modf(math.fabs(y));
+    const r1 = math.modf(@fabs(y));
     var yi = r1.ipart;
     var yf = r1.fpart;
 
@@ -123,7 +123,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
         return math.nan(T);
     }
     if (yi >= 1 << (@typeInfo(T).Float.bits - 1)) {
-        return math.exp(y * math.ln(x));
+        return @exp(y * @log(x));
     }
 
     // a = a1 * 2^ae
@@ -136,7 +136,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
             yf -= 1;
             yi += 1;
         }
-        a1 = math.exp(yf * math.ln(x));
+        a1 = @exp(yf * @log(x));
     }
 
     // a *= x^yi
diff --git a/lib/std/math/round.zig b/lib/std/math/round.zig
deleted file mode 100644
index be33a9cfbd27..000000000000
--- a/lib/std/math/round.zig
+++ /dev/null
@@ -1,185 +0,0 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/roundf.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/round.c
-
-const expect = std.testing.expect;
-const std = @import("../std.zig");
-const math = std.math;
-
-/// Returns x rounded to the nearest integer, rounding half away from zero.
-///
-/// Special Cases:
-///  - round(+-0)   = +-0
-///  - round(+-inf) = +-inf
-///  - round(nan)   = nan
-pub fn round(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => round32(x),
-        f64 => round64(x),
-        f128 => round128(x),
-
-        // TODO this is not correct for some targets
-        c_longdouble => @floatCast(c_longdouble, round128(x)),
-
-        else => @compileError("round not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn round32(x_: f32) f32 {
-    const f32_toint = 1.0 / math.floatEps(f32);
-
-    var x = x_;
-    const u = @bitCast(u32, x);
-    const e = (u >> 23) & 0xFF;
-    var y: f32 = undefined;
-
-    if (e >= 0x7F + 23) {
-        return x;
-    }
-    if (u >> 31 != 0) {
-        x = -x;
-    }
-    if (e < 0x7F - 1) {
-        math.doNotOptimizeAway(x + f32_toint);
-        return 0 * @bitCast(f32, u);
-    }
-
-    y = x + f32_toint - f32_toint - x;
-    if (y > 0.5) {
-        y = y + x - 1;
-    } else if (y <= -0.5) {
-        y = y + x + 1;
-    } else {
-        y = y + x;
-    }
-
-    if (u >> 31 != 0) {
-        return -y;
-    } else {
-        return y;
-    }
-}
-
-fn round64(x_: f64) f64 {
-    const f64_toint = 1.0 / math.floatEps(f64);
-
-    var x = x_;
-    const u = @bitCast(u64, x);
-    const e = (u >> 52) & 0x7FF;
-    var y: f64 = undefined;
-
-    if (e >= 0x3FF + 52) {
-        return x;
-    }
-    if (u >> 63 != 0) {
-        x = -x;
-    }
-    if (e < 0x3ff - 1) {
-        math.doNotOptimizeAway(x + f64_toint);
-        return 0 * @bitCast(f64, u);
-    }
-
-    y = x + f64_toint - f64_toint - x;
-    if (y > 0.5) {
-        y = y + x - 1;
-    } else if (y <= -0.5) {
-        y = y + x + 1;
-    } else {
-        y = y + x;
-    }
-
-    if (u >> 63 != 0) {
-        return -y;
-    } else {
-        return y;
-    }
-}
-
-fn round128(x_: f128) f128 {
-    const f128_toint = 1.0 / math.floatEps(f128);
-
-    var x = x_;
-    const u = @bitCast(u128, x);
-    const e = (u >> 112) & 0x7FFF;
-    var y: f128 = undefined;
-
-    if (e >= 0x3FFF + 112) {
-        return x;
-    }
-    if (u >> 127 != 0) {
-        x = -x;
-    }
-    if (e < 0x3FFF - 1) {
-        math.doNotOptimizeAway(x + f128_toint);
-        return 0 * @bitCast(f128, u);
-    }
-
-    y = x + f128_toint - f128_toint - x;
-    if (y > 0.5) {
-        y = y + x - 1;
-    } else if (y <= -0.5) {
-        y = y + x + 1;
-    } else {
-        y = y + x;
-    }
-
-    if (u >> 127 != 0) {
-        return -y;
-    } else {
-        return y;
-    }
-}
-
-test "math.round" {
-    try expect(round(@as(f32, 1.3)) == round32(1.3));
-    try expect(round(@as(f64, 1.3)) == round64(1.3));
-    try expect(round(@as(f128, 1.3)) == round128(1.3));
-}
-
-test "math.round32" {
-    try expect(round32(1.3) == 1.0);
-    try expect(round32(-1.3) == -1.0);
-    try expect(round32(0.2) == 0.0);
-    try expect(round32(1.8) == 2.0);
-}
-
-test "math.round64" {
-    try expect(round64(1.3) == 1.0);
-    try expect(round64(-1.3) == -1.0);
-    try expect(round64(0.2) == 0.0);
-    try expect(round64(1.8) == 2.0);
-}
-
-test "math.round128" {
-    try expect(round128(1.3) == 1.0);
-    try expect(round128(-1.3) == -1.0);
-    try expect(round128(0.2) == 0.0);
-    try expect(round128(1.8) == 2.0);
-}
-
-test "math.round32.special" {
-    try expect(round32(0.0) == 0.0);
-    try expect(round32(-0.0) == -0.0);
-    try expect(math.isPositiveInf(round32(math.inf(f32))));
-    try expect(math.isNegativeInf(round32(-math.inf(f32))));
-    try expect(math.isNan(round32(math.nan(f32))));
-}
-
-test "math.round64.special" {
-    try expect(round64(0.0) == 0.0);
-    try expect(round64(-0.0) == -0.0);
-    try expect(math.isPositiveInf(round64(math.inf(f64))));
-    try expect(math.isNegativeInf(round64(-math.inf(f64))));
-    try expect(math.isNan(round64(math.nan(f64))));
-}
-
-test "math.round128.special" {
-    try expect(round128(0.0) == 0.0);
-    try expect(round128(-0.0) == -0.0);
-    try expect(math.isPositiveInf(round128(math.inf(f128))));
-    try expect(math.isNegativeInf(round128(-math.inf(f128))));
-    try expect(math.isNan(round128(math.nan(f128))));
-}
diff --git a/lib/std/math/sin.zig b/lib/std/math/sin.zig
deleted file mode 100644
index cf663b1d9ed0..000000000000
--- a/lib/std/math/sin.zig
+++ /dev/null
@@ -1,168 +0,0 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/sinf.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/sin.c
-//
-const std = @import("../std.zig");
-const math = std.math;
-const expect = std.testing.expect;
-
-const kernel = @import("__trig.zig");
-const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
-const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
-
-/// Returns the sine of the radian value x.
-///
-/// Special Cases:
-///  - sin(+-0)   = +-0
-///  - sin(+-inf) = nan
-///  - sin(nan)   = nan
-pub fn sin(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => sin32(x),
-        f64 => sin64(x),
-        else => @compileError("sin not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn sin32(x: f32) f32 {
-    // Small multiples of pi/2 rounded to double precision.
-    const s1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
-    const s2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
-    const s3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
-    const s4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
-
-    var ix = @bitCast(u32, x);
-    const sign = ix >> 31 != 0;
-    ix &= 0x7fffffff;
-
-    if (ix <= 0x3f490fda) { // |x| ~<= pi/4
-        if (ix < 0x39800000) { // |x| < 2**-12
-            // raise inexact if x!=0 and underflow if subnormal
-            math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
-            return x;
-        }
-        return kernel.__sindf(x);
-    }
-    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
-        if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
-            if (sign) {
-                return -kernel.__cosdf(x + s1pio2);
-            } else {
-                return kernel.__cosdf(x - s1pio2);
-            }
-        }
-        return kernel.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
-    }
-    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
-        if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
-            if (sign) {
-                return kernel.__cosdf(x + s3pio2);
-            } else {
-                return -kernel.__cosdf(x - s3pio2);
-            }
-        }
-        return kernel.__sindf(if (sign) x + s4pio2 else x - s4pio2);
-    }
-
-    // sin(Inf or NaN) is NaN
-    if (ix >= 0x7f800000) {
-        return x - x;
-    }
-
-    var y: f64 = undefined;
-    const n = __rem_pio2f(x, &y);
-    return switch (n & 3) {
-        0 => kernel.__sindf(y),
-        1 => kernel.__cosdf(y),
-        2 => kernel.__sindf(-y),
-        else => -kernel.__cosdf(y),
-    };
-}
-
-fn sin64(x: f64) f64 {
-    var ix = @bitCast(u64, x) >> 32;
-    ix &= 0x7fffffff;
-
-    // |x| ~< pi/4
-    if (ix <= 0x3fe921fb) {
-        if (ix < 0x3e500000) { // |x| < 2**-26
-            // raise inexact if x != 0 and underflow if subnormal
-            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
-            return x;
-        }
-        return kernel.__sin(x, 0.0, 0);
-    }
-
-    // sin(Inf or NaN) is NaN
-    if (ix >= 0x7ff00000) {
-        return x - x;
-    }
-
-    var y: [2]f64 = undefined;
-    const n = __rem_pio2(x, &y);
-    return switch (n & 3) {
-        0 => kernel.__sin(y[0], y[1], 1),
-        1 => kernel.__cos(y[0], y[1]),
-        2 => -kernel.__sin(y[0], y[1], 1),
-        else => -kernel.__cos(y[0], y[1]),
-    };
-}
-
-test "math.sin" {
-    try expect(sin(@as(f32, 0.0)) == sin32(0.0));
-    try expect(sin(@as(f64, 0.0)) == sin64(0.0));
-    try expect(comptime (math.sin(@as(f64, 2))) == math.sin(@as(f64, 2)));
-}
-
-test "math.sin32" {
-    const epsilon = 0.00001;
-
-    try expect(math.approxEqAbs(f32, sin32(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(0.2), 0.198669, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(0.8923), 0.778517, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(1.5), 0.997495, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(-1.5), -0.997495, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(37.45), -0.246544, epsilon));
-    try expect(math.approxEqAbs(f32, sin32(89.123), 0.916166, epsilon));
-}
-
-test "math.sin64" {
-    const epsilon = 0.000001;
-
-    try expect(math.approxEqAbs(f64, sin64(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(0.2), 0.198669, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(0.8923), 0.778517, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(1.5), 0.997495, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(-1.5), -0.997495, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(37.45), -0.246543, epsilon));
-    try expect(math.approxEqAbs(f64, sin64(89.123), 0.916166, epsilon));
-}
-
-test "math.sin32.special" {
-    try expect(sin32(0.0) == 0.0);
-    try expect(sin32(-0.0) == -0.0);
-    try expect(math.isNan(sin32(math.inf(f32))));
-    try expect(math.isNan(sin32(-math.inf(f32))));
-    try expect(math.isNan(sin32(math.nan(f32))));
-}
-
-test "math.sin64.special" {
-    try expect(sin64(0.0) == 0.0);
-    try expect(sin64(-0.0) == -0.0);
-    try expect(math.isNan(sin64(math.inf(f64))));
-    try expect(math.isNan(sin64(-math.inf(f64))));
-    try expect(math.isNan(sin64(math.nan(f64))));
-}
-
-test "math.sin32 #9901" {
-    const float = @bitCast(f32, @as(u32, 0b11100011111111110000000000000000));
-    _ = std.math.sin(float);
-}
-
-test "math.sin64 #9901" {
-    const float = @bitCast(f64, @as(u64, 0b1111111101000001000000001111110111111111100000000000000000000001));
-    _ = std.math.sin(float);
-}
diff --git a/lib/std/math/trunc.zig b/lib/std/math/trunc.zig
deleted file mode 100644
index 32bd7fb0aac6..000000000000
--- a/lib/std/math/trunc.zig
+++ /dev/null
@@ -1,141 +0,0 @@
-// Ported from musl, which is licensed under the MIT license:
-// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
-//
-// https://git.musl-libc.org/cgit/musl/tree/src/math/truncf.c
-// https://git.musl-libc.org/cgit/musl/tree/src/math/trunc.c
-
-const std = @import("../std.zig");
-const math = std.math;
-const expect = std.testing.expect;
-const maxInt = std.math.maxInt;
-
-/// Returns the integer value of x.
-///
-/// Special Cases:
-///  - trunc(+-0)   = +-0
-///  - trunc(+-inf) = +-inf
-///  - trunc(nan)   = nan
-pub fn trunc(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => trunc32(x),
-        f64 => trunc64(x),
-        f128 => trunc128(x),
-
-        // TODO this is not correct for some targets
-        c_longdouble => @floatCast(c_longdouble, trunc128(x)),
-
-        else => @compileError("trunc not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn trunc32(x: f32) f32 {
-    const u = @bitCast(u32, x);
-    var e = @intCast(i32, ((u >> 23) & 0xFF)) - 0x7F + 9;
-    var m: u32 = undefined;
-
-    if (e >= 23 + 9) {
-        return x;
-    }
-    if (e < 9) {
-        e = 1;
-    }
-
-    m = @as(u32, maxInt(u32)) >> @intCast(u5, e);
-    if (u & m == 0) {
-        return x;
-    } else {
-        math.doNotOptimizeAway(x + 0x1p120);
-        return @bitCast(f32, u & ~m);
-    }
-}
-
-fn trunc64(x: f64) f64 {
-    const u = @bitCast(u64, x);
-    var e = @intCast(i32, ((u >> 52) & 0x7FF)) - 0x3FF + 12;
-    var m: u64 = undefined;
-
-    if (e >= 52 + 12) {
-        return x;
-    }
-    if (e < 12) {
-        e = 1;
-    }
-
-    m = @as(u64, maxInt(u64)) >> @intCast(u6, e);
-    if (u & m == 0) {
-        return x;
-    } else {
-        math.doNotOptimizeAway(x + 0x1p120);
-        return @bitCast(f64, u & ~m);
-    }
-}
-
-fn trunc128(x: f128) f128 {
-    const u = @bitCast(u128, x);
-    var e = @intCast(i32, ((u >> 112) & 0x7FFF)) - 0x3FFF + 16;
-    var m: u128 = undefined;
-
-    if (e >= 112 + 16) {
-        return x;
-    }
-    if (e < 16) {
-        e = 1;
-    }
-
-    m = @as(u128, maxInt(u128)) >> @intCast(u7, e);
-    if (u & m == 0) {
-        return x;
-    } else {
-        math.doNotOptimizeAway(x + 0x1p120);
-        return @bitCast(f128, u & ~m);
-    }
-}
-
-test "math.trunc" {
-    try expect(trunc(@as(f32, 1.3)) == trunc32(1.3));
-    try expect(trunc(@as(f64, 1.3)) == trunc64(1.3));
-    try expect(trunc(@as(f128, 1.3)) == trunc128(1.3));
-}
-
-test "math.trunc32" {
-    try expect(trunc32(1.3) == 1.0);
-    try expect(trunc32(-1.3) == -1.0);
-    try expect(trunc32(0.2) == 0.0);
-}
-
-test "math.trunc64" {
-    try expect(trunc64(1.3) == 1.0);
-    try expect(trunc64(-1.3) == -1.0);
-    try expect(trunc64(0.2) == 0.0);
-}
-
-test "math.trunc128" {
-    try expect(trunc128(1.3) == 1.0);
-    try expect(trunc128(-1.3) == -1.0);
-    try expect(trunc128(0.2) == 0.0);
-}
-
-test "math.trunc32.special" {
-    try expect(trunc32(0.0) == 0.0); // 0x3F800000
-    try expect(trunc32(-0.0) == -0.0);
-    try expect(math.isPositiveInf(trunc32(math.inf(f32))));
-    try expect(math.isNegativeInf(trunc32(-math.inf(f32))));
-    try expect(math.isNan(trunc32(math.nan(f32))));
-}
-
-test "math.trunc64.special" {
-    try expect(trunc64(0.0) == 0.0);
-    try expect(trunc64(-0.0) == -0.0);
-    try expect(math.isPositiveInf(trunc64(math.inf(f64))));
-    try expect(math.isNegativeInf(trunc64(-math.inf(f64))));
-    try expect(math.isNan(trunc64(math.nan(f64))));
-}
-
-test "math.trunc128.special" {
-    try expect(trunc128(0.0) == 0.0);
-    try expect(trunc128(-0.0) == -0.0);
-    try expect(math.isPositiveInf(trunc128(math.inf(f128))));
-    try expect(math.isNegativeInf(trunc128(-math.inf(f128))));
-    try expect(math.isNan(trunc128(math.nan(f128))));
-}
diff --git a/lib/std/rand/ziggurat.zig b/lib/std/rand/ziggurat.zig
index 5c18d0023b1b..b05ce7fd7351 100644
--- a/lib/std/rand/ziggurat.zig
+++ b/lib/std/rand/ziggurat.zig
@@ -33,7 +33,7 @@ pub fn next_f64(random: Random, comptime tables: ZigTable) f64 {
         };
 
         const x = u * tables.x[i];
-        const test_x = if (tables.is_symmetric) math.fabs(x) else x;
+        const test_x = if (tables.is_symmetric) @fabs(x) else x;
 
         // equivalent to |u| < tables.x[i+1] / tables.x[i] (or u < tables.x[i+1] / tables.x[i])
         if (test_x < tables.x[i + 1]) {
@@ -106,18 +106,18 @@ const norm_r = 3.6541528853610088;
 const norm_v = 0.00492867323399;
 
 fn norm_f(x: f64) f64 {
-    return math.exp(-x * x / 2.0);
+    return @exp(-x * x / 2.0);
 }
 fn norm_f_inv(y: f64) f64 {
-    return math.sqrt(-2.0 * math.ln(y));
+    return @sqrt(-2.0 * @log(y));
 }
 fn norm_zero_case(random: Random, u: f64) f64 {
     var x: f64 = 1;
     var y: f64 = 0;
 
     while (-2.0 * y < x * x) {
-        x = math.ln(random.float(f64)) / norm_r;
-        y = math.ln(random.float(f64));
+        x = @log(random.float(f64)) / norm_r;
+        y = @log(random.float(f64));
     }
 
     if (u < 0) {
@@ -151,13 +151,13 @@ const exp_r = 7.69711747013104972;
 const exp_v = 0.0039496598225815571993;
 
 fn exp_f(x: f64) f64 {
-    return math.exp(-x);
+    return @exp(-x);
 }
 fn exp_f_inv(y: f64) f64 {
-    return -math.ln(y);
+    return -@log(y);
 }
 fn exp_zero_case(random: Random, _: f64) f64 {
-    return exp_r - math.ln(random.float(f64));
+    return exp_r - @log(random.float(f64));
 }
 
 test "exp dist sanity" {
diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig
index dfc20203342f..525bdd267de7 100644
--- a/lib/std/special/c.zig
+++ b/lib/std/special/c.zig
@@ -12,7 +12,6 @@ const maxInt = std.math.maxInt;
 const native_os = builtin.os.tag;
 const native_arch = builtin.cpu.arch;
 const native_abi = builtin.abi;
-const long_double_is_f128 = builtin.target.longDoubleIs(f128);
 
 const is_wasm = switch (native_arch) {
     .wasm32, .wasm64 => true,
@@ -55,53 +54,6 @@ comptime {
     } else if (is_msvc) {
         @export(_fltused, .{ .name = "_fltused", .linkage = .Strong });
     }
-
-    @export(trunc, .{ .name = "trunc", .linkage = .Strong });
-    @export(truncf, .{ .name = "truncf", .linkage = .Strong });
-    @export(truncl, .{ .name = "truncl", .linkage = .Strong });
-
-    @export(log, .{ .name = "log", .linkage = .Strong });
-    @export(logf, .{ .name = "logf", .linkage = .Strong });
-
-    @export(sin, .{ .name = "sin", .linkage = .Strong });
-    @export(sinf, .{ .name = "sinf", .linkage = .Strong });
-
-    @export(cos, .{ .name = "cos", .linkage = .Strong });
-    @export(cosf, .{ .name = "cosf", .linkage = .Strong });
-
-    @export(exp, .{ .name = "exp", .linkage = .Strong });
-    @export(expf, .{ .name = "expf", .linkage = .Strong });
-
-    @export(exp2, .{ .name = "exp2", .linkage = .Strong });
-    @export(exp2f, .{ .name = "exp2f", .linkage = .Strong });
-
-    @export(log2, .{ .name = "log2", .linkage = .Strong });
-    @export(log2f, .{ .name = "log2f", .linkage = .Strong });
-
-    @export(log10, .{ .name = "log10", .linkage = .Strong });
-    @export(log10f, .{ .name = "log10f", .linkage = .Strong });
-
-    @export(fmod, .{ .name = "fmod", .linkage = .Strong });
-    @export(fmodf, .{ .name = "fmodf", .linkage = .Strong });
-
-    @export(sincos, .{ .name = "sincos", .linkage = .Strong });
-    @export(sincosf, .{ .name = "sincosf", .linkage = .Strong });
-
-    @export(fabs, .{ .name = "fabs", .linkage = .Strong });
-    @export(fabsf, .{ .name = "fabsf", .linkage = .Strong });
-
-    @export(round, .{ .name = "round", .linkage = .Strong });
-    @export(roundf, .{ .name = "roundf", .linkage = .Strong });
-    @export(roundl, .{ .name = "roundl", .linkage = .Strong });
-
-    @export(fmin, .{ .name = "fmin", .linkage = .Strong });
-    @export(fminf, .{ .name = "fminf", .linkage = .Strong });
-
-    @export(fmax, .{ .name = "fmax", .linkage = .Strong });
-    @export(fmaxf, .{ .name = "fmaxf", .linkage = .Strong });
-
-    @export(sqrt, .{ .name = "sqrt", .linkage = .Strong });
-    @export(sqrtf, .{ .name = "sqrtf", .linkage = .Strong });
 }
 
 // Avoid dragging in the runtime safety mechanisms into this .o file,
@@ -352,549 +304,6 @@ test "strncmp" {
     try std.testing.expect(strncmp("\xff", "\x02", 1) == 253);
 }
 
-fn trunc(a: f64) callconv(.C) f64 {
-    return math.trunc(a);
-}
-
-fn truncf(a: f32) callconv(.C) f32 {
-    return math.trunc(a);
-}
-
-fn truncl(a: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
-    }
-    return math.trunc(a);
-}
-
-fn log(a: f64) callconv(.C) f64 {
-    return math.ln(a);
-}
-
-fn logf(a: f32) callconv(.C) f32 {
-    return math.ln(a);
-}
-
-fn sin(a: f64) callconv(.C) f64 {
-    return math.sin(a);
-}
-
-fn sinf(a: f32) callconv(.C) f32 {
-    return math.sin(a);
-}
-
-fn cos(a: f64) callconv(.C) f64 {
-    return math.cos(a);
-}
-
-fn cosf(a: f32) callconv(.C) f32 {
-    return math.cos(a);
-}
-
-fn exp(a: f64) callconv(.C) f64 {
-    return math.exp(a);
-}
-
-fn expf(a: f32) callconv(.C) f32 {
-    return math.exp(a);
-}
-
-fn exp2(a: f64) callconv(.C) f64 {
-    return math.exp2(a);
-}
-
-fn exp2f(a: f32) callconv(.C) f32 {
-    return math.exp2(a);
-}
-
-fn log2(a: f64) callconv(.C) f64 {
-    return math.log2(a);
-}
-
-fn log2f(a: f32) callconv(.C) f32 {
-    return math.log2(a);
-}
-
-fn log10(a: f64) callconv(.C) f64 {
-    return math.log10(a);
-}
-
-fn log10f(a: f32) callconv(.C) f32 {
-    return math.log10(a);
-}
-
-fn fmodf(x: f32, y: f32) callconv(.C) f32 {
-    return generic_fmod(f32, x, y);
-}
-fn fmod(x: f64, y: f64) callconv(.C) f64 {
-    return generic_fmod(f64, x, y);
-}
-
-fn generic_fmod(comptime T: type, x: T, y: T) T {
-    @setRuntimeSafety(false);
-
-    const bits = @typeInfo(T).Float.bits;
-    const uint = std.meta.Int(.unsigned, bits);
-    const log2uint = math.Log2Int(uint);
-    const digits = if (T == f32) 23 else 52;
-    const exp_bits = if (T == f32) 9 else 12;
-    const bits_minus_1 = bits - 1;
-    const mask = if (T == f32) 0xff else 0x7ff;
-    var ux = @bitCast(uint, x);
-    var uy = @bitCast(uint, y);
-    var ex = @intCast(i32, (ux >> digits) & mask);
-    var ey = @intCast(i32, (uy >> digits) & mask);
-    const sx = if (T == f32) @intCast(u32, ux & 0x80000000) else @intCast(i32, ux >> bits_minus_1);
-    var i: uint = undefined;
-
-    if (uy << 1 == 0 or isNan(@bitCast(T, uy)) or ex == mask)
-        return (x * y) / (x * y);
-
-    if (ux << 1 <= uy << 1) {
-        if (ux << 1 == uy << 1)
-            return 0 * x;
-        return x;
-    }
-
-    // normalize x and y
-    if (ex == 0) {
-        i = ux << exp_bits;
-        while (i >> bits_minus_1 == 0) : ({
-            ex -= 1;
-            i <<= 1;
-        }) {}
-        ux <<= @intCast(log2uint, @bitCast(u32, -ex + 1));
-    } else {
-        ux &= maxInt(uint) >> exp_bits;
-        ux |= 1 << digits;
-    }
-    if (ey == 0) {
-        i = uy << exp_bits;
-        while (i >> bits_minus_1 == 0) : ({
-            ey -= 1;
-            i <<= 1;
-        }) {}
-        uy <<= @intCast(log2uint, @bitCast(u32, -ey + 1));
-    } else {
-        uy &= maxInt(uint) >> exp_bits;
-        uy |= 1 << digits;
-    }
-
-    // x mod y
-    while (ex > ey) : (ex -= 1) {
-        i = ux -% uy;
-        if (i >> bits_minus_1 == 0) {
-            if (i == 0)
-                return 0 * x;
-            ux = i;
-        }
-        ux <<= 1;
-    }
-    i = ux -% uy;
-    if (i >> bits_minus_1 == 0) {
-        if (i == 0)
-            return 0 * x;
-        ux = i;
-    }
-    while (ux >> digits == 0) : ({
-        ux <<= 1;
-        ex -= 1;
-    }) {}
-
-    // scale result up
-    if (ex > 0) {
-        ux -%= 1 << digits;
-        ux |= @as(uint, @bitCast(u32, ex)) << digits;
-    } else {
-        ux >>= @intCast(log2uint, @bitCast(u32, -ex + 1));
-    }
-    if (T == f32) {
-        ux |= sx;
-    } else {
-        ux |= @intCast(uint, sx) << bits_minus_1;
-    }
-    return @bitCast(T, ux);
-}
-
-test "fmod, fmodf" {
-    inline for ([_]type{ f32, f64 }) |T| {
-        const nan_val = math.nan(T);
-        const inf_val = math.inf(T);
-
-        try std.testing.expect(isNan(generic_fmod(T, nan_val, 1.0)));
-        try std.testing.expect(isNan(generic_fmod(T, 1.0, nan_val)));
-        try std.testing.expect(isNan(generic_fmod(T, inf_val, 1.0)));
-        try std.testing.expect(isNan(generic_fmod(T, 0.0, 0.0)));
-        try std.testing.expect(isNan(generic_fmod(T, 1.0, 0.0)));
-
-        try std.testing.expectEqual(@as(T, 0.0), generic_fmod(T, 0.0, 2.0));
-        try std.testing.expectEqual(@as(T, -0.0), generic_fmod(T, -0.0, 2.0));
-
-        try std.testing.expectEqual(@as(T, -2.0), generic_fmod(T, -32.0, 10.0));
-        try std.testing.expectEqual(@as(T, -2.0), generic_fmod(T, -32.0, -10.0));
-        try std.testing.expectEqual(@as(T, 2.0), generic_fmod(T, 32.0, 10.0));
-        try std.testing.expectEqual(@as(T, 2.0), generic_fmod(T, 32.0, -10.0));
-    }
-}
-
-fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
-    r_sin.* = math.sin(a);
-    r_cos.* = math.cos(a);
-}
-
-fn sincosf(a: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void {
-    r_sin.* = math.sin(a);
-    r_cos.* = math.cos(a);
-}
-
-fn fabs(a: f64) callconv(.C) f64 {
-    return math.fabs(a);
-}
-
-fn fabsf(a: f32) callconv(.C) f32 {
-    return math.fabs(a);
-}
-
-fn roundf(a: f32) callconv(.C) f32 {
-    return math.round(a);
-}
-
-fn round(a: f64) callconv(.C) f64 {
-    return math.round(a);
-}
-
-fn roundl(a: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
-    }
-    return math.round(a);
-}
-
-fn fminf(x: f32, y: f32) callconv(.C) f32 {
-    return generic_fmin(f32, x, y);
-}
-
-fn fmin(x: f64, y: f64) callconv(.C) f64 {
-    return generic_fmin(f64, x, y);
-}
-
-fn generic_fmin(comptime T: type, x: T, y: T) T {
-    if (isNan(x))
-        return y;
-    if (isNan(y))
-        return x;
-    return if (x < y) x else y;
-}
-
-test "fmin, fminf" {
-    inline for ([_]type{ f32, f64 }) |T| {
-        const nan_val = math.nan(T);
-
-        try std.testing.expect(isNan(generic_fmin(T, nan_val, nan_val)));
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, nan_val, 1.0));
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, 1.0, nan_val));
-
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, 1.0, 10.0));
-        try std.testing.expectEqual(@as(T, -1.0), generic_fmin(T, 1.0, -1.0));
-    }
-}
-
-fn fmaxf(x: f32, y: f32) callconv(.C) f32 {
-    return generic_fmax(f32, x, y);
-}
-
-fn fmax(x: f64, y: f64) callconv(.C) f64 {
-    return generic_fmax(f64, x, y);
-}
-
-fn generic_fmax(comptime T: type, x: T, y: T) T {
-    if (isNan(x))
-        return y;
-    if (isNan(y))
-        return x;
-    return if (x < y) y else x;
-}
-
-test "fmax, fmaxf" {
-    inline for ([_]type{ f32, f64 }) |T| {
-        const nan_val = math.nan(T);
-
-        try std.testing.expect(isNan(generic_fmax(T, nan_val, nan_val)));
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, nan_val, 1.0));
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, 1.0, nan_val));
-
-        try std.testing.expectEqual(@as(T, 10.0), generic_fmax(T, 1.0, 10.0));
-        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, 1.0, -1.0));
-    }
-}
-
-// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
-// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
-// potentially some edge cases remaining that are not handled in the same way.
-fn sqrt(x: f64) callconv(.C) f64 {
-    const tiny: f64 = 1.0e-300;
-    const sign: u32 = 0x80000000;
-    const u = @bitCast(u64, x);
-
-    var ix0 = @intCast(u32, u >> 32);
-    var ix1 = @intCast(u32, u & 0xFFFFFFFF);
-
-    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
-    if (ix0 & 0x7FF00000 == 0x7FF00000) {
-        return x * x + x;
-    }
-
-    // sqrt(+-0) = +-0
-    if (x == 0.0) {
-        return x;
-    }
-    // sqrt(-ve) = snan
-    if (ix0 & sign != 0) {
-        return math.snan(f64);
-    }
-
-    // normalize x
-    var m = @intCast(i32, ix0 >> 20);
-    if (m == 0) {
-        // subnormal
-        while (ix0 == 0) {
-            m -= 21;
-            ix0 |= ix1 >> 11;
-            ix1 <<= 21;
-        }
-
-        // subnormal
-        var i: u32 = 0;
-        while (ix0 & 0x00100000 == 0) : (i += 1) {
-            ix0 <<= 1;
-        }
-        m -= @intCast(i32, i) - 1;
-        ix0 |= ix1 >> @intCast(u5, 32 - i);
-        ix1 <<= @intCast(u5, i);
-    }
-
-    // unbias exponent
-    m -= 1023;
-    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
-    if (m & 1 != 0) {
-        ix0 += ix0 + (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-    }
-    m >>= 1;
-
-    // sqrt(x) bit by bit
-    ix0 += ix0 + (ix1 >> 31);
-    ix1 = ix1 +% ix1;
-
-    var q: u32 = 0;
-    var q1: u32 = 0;
-    var s0: u32 = 0;
-    var s1: u32 = 0;
-    var r: u32 = 0x00200000;
-    var t: u32 = undefined;
-    var t1: u32 = undefined;
-
-    while (r != 0) {
-        t = s0 +% r;
-        if (t <= ix0) {
-            s0 = t + r;
-            ix0 -= t;
-            q += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    r = sign;
-    while (r != 0) {
-        t1 = s1 +% r;
-        t = s0;
-        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
-            s1 = t1 +% r;
-            if (t1 & sign == sign and s1 & sign == 0) {
-                s0 += 1;
-            }
-            ix0 -= t;
-            if (ix1 < t1) {
-                ix0 -= 1;
-            }
-            ix1 = ix1 -% t1;
-            q1 += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    // rounding direction
-    if (ix0 | ix1 != 0) {
-        var z = 1.0 - tiny; // raise inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (q1 == 0xFFFFFFFF) {
-                q1 = 0;
-                q += 1;
-            } else if (z > 1.0) {
-                if (q1 == 0xFFFFFFFE) {
-                    q += 1;
-                }
-                q1 += 2;
-            } else {
-                q1 += q1 & 1;
-            }
-        }
-    }
-
-    ix0 = (q >> 1) + 0x3FE00000;
-    ix1 = q1 >> 1;
-    if (q & 1 != 0) {
-        ix1 |= 0x80000000;
-    }
-
-    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
-    // behaviour at least.
-    var iix0 = @intCast(i32, ix0);
-    iix0 = iix0 +% (m << 20);
-
-    const uz = (@intCast(u64, iix0) << 32) | ix1;
-    return @bitCast(f64, uz);
-}
-
-test "sqrt" {
-    const V = [_]f64{
-        0.0,
-        4.089288054930154,
-        7.538757127071935,
-        8.97780793672623,
-        5.304443821913729,
-        5.682408965311888,
-        0.5846878579110049,
-        3.650338664297043,
-        0.3178091951800732,
-        7.1505232436382835,
-        3.6589165881946464,
-    };
-
-    // Note that @sqrt will either generate the sqrt opcode (if supported by the
-    // target ISA) or a call to `sqrtf` otherwise.
-    for (V) |val|
-        try std.testing.expectEqual(@sqrt(val), sqrt(val));
-}
-
-test "sqrt special" {
-    try std.testing.expect(std.math.isPositiveInf(sqrt(std.math.inf(f64))));
-    try std.testing.expect(sqrt(0.0) == 0.0);
-    try std.testing.expect(sqrt(-0.0) == -0.0);
-    try std.testing.expect(isNan(sqrt(-1.0)));
-    try std.testing.expect(isNan(sqrt(std.math.nan(f64))));
-}
-
-fn sqrtf(x: f32) callconv(.C) f32 {
-    const tiny: f32 = 1.0e-30;
-    const sign: i32 = @bitCast(i32, @as(u32, 0x80000000));
-    var ix: i32 = @bitCast(i32, x);
-
-    if ((ix & 0x7F800000) == 0x7F800000) {
-        return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
-    }
-
-    // zero
-    if (ix <= 0) {
-        if (ix & ~sign == 0) {
-            return x; // sqrt (+-0) = +-0
-        }
-        if (ix < 0) {
-            return math.snan(f32);
-        }
-    }
-
-    // normalize
-    var m = ix >> 23;
-    if (m == 0) {
-        // subnormal
-        var i: i32 = 0;
-        while (ix & 0x00800000 == 0) : (i += 1) {
-            ix <<= 1;
-        }
-        m -= i - 1;
-    }
-
-    m -= 127; // unbias exponent
-    ix = (ix & 0x007FFFFF) | 0x00800000;
-
-    if (m & 1 != 0) { // odd m, double x to even
-        ix += ix;
-    }
-
-    m >>= 1; // m = [m / 2]
-
-    // sqrt(x) bit by bit
-    ix += ix;
-    var q: i32 = 0; // q = sqrt(x)
-    var s: i32 = 0;
-    var r: i32 = 0x01000000; // r = moving bit right -> left
-
-    while (r != 0) {
-        const t = s + r;
-        if (t <= ix) {
-            s = t + r;
-            ix -= t;
-            q += r;
-        }
-        ix += ix;
-        r >>= 1;
-    }
-
-    // floating add to find rounding direction
-    if (ix != 0) {
-        var z = 1.0 - tiny; // inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (z > 1.0) {
-                q += 2;
-            } else {
-                if (q & 1 != 0) {
-                    q += 1;
-                }
-            }
-        }
-    }
-
-    ix = (q >> 1) + 0x3f000000;
-    ix += m << 23;
-    return @bitCast(f32, ix);
-}
-
-test "sqrtf" {
-    const V = [_]f32{
-        0.0,
-        4.089288054930154,
-        7.538757127071935,
-        8.97780793672623,
-        5.304443821913729,
-        5.682408965311888,
-        0.5846878579110049,
-        3.650338664297043,
-        0.3178091951800732,
-        7.1505232436382835,
-        3.6589165881946464,
-    };
-
-    // Note that @sqrt will either generate the sqrt opcode (if supported by the
-    // target ISA) or a call to `sqrtf` otherwise.
-    for (V) |val|
-        try std.testing.expectEqual(@sqrt(val), sqrtf(val));
-}
-
-test "sqrtf special" {
-    try std.testing.expect(std.math.isPositiveInf(sqrtf(std.math.inf(f32))));
-    try std.testing.expect(sqrtf(0.0) == 0.0);
-    try std.testing.expect(sqrtf(-0.0) == -0.0);
-    try std.testing.expect(isNan(sqrtf(-1.0)));
-    try std.testing.expect(isNan(sqrtf(std.math.nan(f32))));
-}
-
 // TODO we should be able to put this directly in std/linux/x86_64.zig but
 // it causes a segfault in release mode. this is a workaround of calling it
 // across .o file boundaries. fix comptime @ptrCast of nakedcc functions.
diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig
index 93e0ffbe1a17..f509e584f5ce 100644
--- a/lib/std/special/compiler_rt.zig
+++ b/lib/std/special/compiler_rt.zig
@@ -8,6 +8,7 @@ const abi = builtin.abi;
 const is_gnu = abi.isGnu();
 const is_mingw = os_tag == .windows and is_gnu;
 const is_darwin = std.Target.Os.Tag.isDarwin(os_tag);
+const is_ppc = arch.isPPC() or arch.isPPC64();
 
 const linkage = if (is_test)
     std.builtin.GlobalLinkage.Internal
@@ -19,9 +20,6 @@ const strong_linkage = if (is_test)
 else
     std.builtin.GlobalLinkage.Strong;
 
-const long_double_is_f80 = builtin.target.longDoubleIs(f80);
-const long_double_is_f128 = builtin.target.longDoubleIs(f128);
-
 comptime {
     // These files do their own comptime exporting logic.
     _ = @import("compiler_rt/atomics.zig");
@@ -726,42 +724,25 @@ comptime {
         @export(_aullrem, .{ .name = "\x01__aullrem", .linkage = strong_linkage });
     }
 
-    if (!is_test) {
-        if (long_double_is_f80) {
-            @export(fmodx, .{ .name = "fmodl", .linkage = linkage });
-        } else if (long_double_is_f128) {
-            @export(fmodq, .{ .name = "fmodl", .linkage = linkage });
-        } else {
-            @export(fmodl, .{ .name = "fmodl", .linkage = linkage });
-        }
-        if (long_double_is_f80 or builtin.zig_backend == .stage1) {
-            // TODO: https://github.com/ziglang/zig/issues/11161
-            @export(fmodx, .{ .name = "fmodx", .linkage = linkage });
-        }
-        @export(fmodq, .{ .name = "fmodq", .linkage = linkage });
-
-        @export(floorf, .{ .name = "floorf", .linkage = linkage });
-        @export(floor, .{ .name = "floor", .linkage = linkage });
-        @export(floorl, .{ .name = "floorl", .linkage = linkage });
-
-        @export(ceilf, .{ .name = "ceilf", .linkage = linkage });
-        @export(ceil, .{ .name = "ceil", .linkage = linkage });
-        @export(ceill, .{ .name = "ceill", .linkage = linkage });
-
-        @export(fma, .{ .name = "fma", .linkage = linkage });
-        @export(fmaf, .{ .name = "fmaf", .linkage = linkage });
-        @export(fmal, .{ .name = "fmal", .linkage = linkage });
-        if (long_double_is_f80) {
-            @export(fmal, .{ .name = "__fmax", .linkage = linkage });
-        } else {
-            @export(__fmax, .{ .name = "__fmax", .linkage = linkage });
-        }
-        if (long_double_is_f128) {
-            @export(fmal, .{ .name = "fmaq", .linkage = linkage });
-        } else {
-            @export(fmaq, .{ .name = "fmaq", .linkage = linkage });
-        }
-    }
+    mathExport("ceil", @import("./compiler_rt/ceil.zig"), true);
+    mathExport("cos", @import("./compiler_rt/cos.zig"), true);
+    mathExport("exp", @import("./compiler_rt/exp.zig"), true);
+    mathExport("exp2", @import("./compiler_rt/exp2.zig"), true);
+    mathExport("fabs", @import("./compiler_rt/fabs.zig"), true);
+    mathExport("floor", @import("./compiler_rt/floor.zig"), true);
+    mathExport("fma", @import("./compiler_rt/fma.zig"), true);
+    mathExport("fmax", @import("./compiler_rt/fmax.zig"), true);
+    mathExport("fmin", @import("./compiler_rt/fmin.zig"), true);
+    mathExport("fmod", @import("./compiler_rt/fmod.zig"), true);
+    mathExport("log", @import("./compiler_rt/log.zig"), true);
+    mathExport("log10", @import("./compiler_rt/log10.zig"), true);
+    mathExport("log2", @import("./compiler_rt/log2.zig"), true);
+    mathExport("round", @import("./compiler_rt/round.zig"), true);
+    mathExport("sin", @import("./compiler_rt/sin.zig"), true);
+    mathExport("sincos", @import("./compiler_rt/sincos.zig"), true);
+    mathExport("sqrt", @import("./compiler_rt/sqrt.zig"), true);
+    mathExport("tan", @import("./compiler_rt/tan.zig"), false);
+    mathExport("trunc", @import("./compiler_rt/trunc.zig"), true);
 
     if (arch.isSPARC()) {
         // SPARC systems use a different naming scheme
@@ -815,7 +796,7 @@ comptime {
         @export(_Qp_qtod, .{ .name = "_Qp_qtod", .linkage = linkage });
     }
 
-    if ((arch.isPPC() or arch.isPPC64()) and !is_test) {
+    if (is_ppc and !is_test) {
         @export(__addtf3, .{ .name = "__addkf3", .linkage = linkage });
         @export(__subtf3, .{ .name = "__subkf3", .linkage = linkage });
         @export(__multf3, .{ .name = "__mulkf3", .linkage = linkage });
@@ -840,65 +821,53 @@ comptime {
         @export(__letf2, .{ .name = "__lekf2", .linkage = linkage });
         @export(__getf2, .{ .name = "__gtkf2", .linkage = linkage });
         @export(__unordtf2, .{ .name = "__unordkf2", .linkage = linkage });
-
-        // LLVM PPC backend lowers f128 fma to `fmaf128`.
-        @export(fmal, .{ .name = "fmaf128", .linkage = linkage });
-    }
-}
-
-const math = std.math;
-
-fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 {
-    return math.fma(f32, a, b, c);
-}
-fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 {
-    return math.fma(f64, a, b, c);
-}
-fn __fmax(a: f80, b: f80, c: f80) callconv(.C) f80 {
-    return math.fma(f80, a, b, c);
-}
-fn fmaq(a: f128, b: f128, c: f128) callconv(.C) f128 {
-    return math.fma(f128, a, b, c);
-}
-fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble {
-    return math.fma(c_longdouble, a, b, c);
-}
-
-// TODO add intrinsics for these (and probably the double version too)
-// and have the math stuff use the intrinsic. same as @mod and @rem
-fn floorf(x: f32) callconv(.C) f32 {
-    return math.floor(x);
-}
-fn floor(x: f64) callconv(.C) f64 {
-    return math.floor(x);
-}
-fn floorl(x: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
     }
-    return math.floor(x);
 }
 
-fn ceilf(x: f32) callconv(.C) f32 {
-    return math.ceil(x);
-}
-fn ceil(x: f64) callconv(.C) f64 {
-    return math.ceil(x);
-}
-fn ceill(x: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
+inline fn mathExport(double_name: []const u8, comptime import: type, is_standard: bool) void {
+    const half_name = "__" ++ double_name ++ "h";
+    const half_fn = @field(import, half_name);
+    const float_name = double_name ++ "f";
+    const float_fn = @field(import, float_name);
+    const double_fn = @field(import, double_name);
+    const long_double_name = double_name ++ "l";
+    const xf80_name = "__" ++ double_name ++ "x";
+    const xf80_fn = @field(import, xf80_name);
+    const quad_name = double_name ++ "q";
+    const quad_fn = @field(import, quad_name);
+
+    @export(half_fn, .{ .name = half_name, .linkage = linkage });
+    @export(float_fn, .{ .name = float_name, .linkage = linkage });
+    @export(double_fn, .{ .name = double_name, .linkage = linkage });
+    @export(xf80_fn, .{ .name = xf80_name, .linkage = linkage });
+    @export(quad_fn, .{ .name = quad_name, .linkage = linkage });
+
+    if (is_test) return;
+
+    const pairs = .{
+        .{ f16, half_fn },
+        .{ f32, float_fn },
+        .{ f64, double_fn },
+        .{ f80, xf80_fn },
+        .{ f128, quad_fn },
+    };
+
+    // Weak aliases don't work on Windows, so we avoid exporting the `l` alias
+    // on this platform for functions we know will collide.
+    if (builtin.os.tag != .windows or !builtin.link_libc or !is_standard) {
+        inline for (pairs) |pair| {
+            const F = pair[0];
+            const func = pair[1];
+            if (builtin.target.longDoubleIs(F)) {
+                @export(func, .{ .name = long_double_name, .linkage = linkage });
+            }
+        }
     }
-    return math.ceil(x);
-}
 
-const fmodq = @import("compiler_rt/fmodq.zig").fmodq;
-const fmodx = @import("compiler_rt/fmodx.zig").fmodx;
-fn fmodl(x: c_longdouble, y: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
+    if (is_ppc and is_standard) {
+        // LLVM PPC backend lowers f128 ops with the suffix `f128` instead of `l`.
+        @export(quad_fn, .{ .name = double_name ++ "f128", .linkage = linkage });
     }
-    return @floatCast(c_longdouble, fmodq(x, y));
 }
 
 // Avoid dragging in the runtime safety mechanisms into this .o file,
diff --git a/lib/std/math/ceil.zig b/lib/std/special/compiler_rt/ceil.zig
similarity index 52%
rename from lib/std/math/ceil.zig
rename to lib/std/special/compiler_rt/ceil.zig
index 686be8e58d32..c7087a2c3a7e 100644
--- a/lib/std/math/ceil.zig
+++ b/lib/std/special/compiler_rt/ceil.zig
@@ -4,31 +4,16 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/ceilf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/ceil.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;
 
-/// Returns the least integer value greater than of equal to x.
-///
-/// Special Cases:
-///  - ceil(+-0)   = +-0
-///  - ceil(+-inf) = +-inf
-///  - ceil(nan)   = nan
-pub fn ceil(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => ceil32(x),
-        f64 => ceil64(x),
-        f128 => ceil128(x),
-
-        // TODO this is not correct for some targets
-        c_longdouble => @floatCast(c_longdouble, ceil128(x)),
-
-        else => @compileError("ceil not implemented for " ++ @typeName(T)),
-    };
+pub fn __ceilh(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, ceilf(x));
 }
 
-fn ceil32(x: f32) f32 {
+pub fn ceilf(x: f32) callconv(.C) f32 {
     var u = @bitCast(u32, x);
     var e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
     var m: u32 = undefined;
@@ -61,7 +46,7 @@ fn ceil32(x: f32) f32 {
     }
 }
 
-fn ceil64(x: f64) f64 {
+pub fn ceil(x: f64) callconv(.C) f64 {
     const f64_toint = 1.0 / math.floatEps(f64);
 
     const u = @bitCast(u64, x);
@@ -92,7 +77,12 @@ fn ceil64(x: f64) f64 {
     }
 }
 
-fn ceil128(x: f128) f128 {
+pub fn __ceilx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, ceilq(x));
+}
+
+pub fn ceilq(x: f128) callconv(.C) f128 {
     const f128_toint = 1.0 / math.floatEps(f128);
 
     const u = @bitCast(u128, x);
@@ -121,50 +111,44 @@ fn ceil128(x: f128) f128 {
     }
 }
 
-test "math.ceil" {
-    try expect(ceil(@as(f32, 0.0)) == ceil32(0.0));
-    try expect(ceil(@as(f64, 0.0)) == ceil64(0.0));
-    try expect(ceil(@as(f128, 0.0)) == ceil128(0.0));
-}
-
-test "math.ceil32" {
-    try expect(ceil32(1.3) == 2.0);
-    try expect(ceil32(-1.3) == -1.0);
-    try expect(ceil32(0.2) == 1.0);
+test "ceil32" {
+    try expect(ceilf(1.3) == 2.0);
+    try expect(ceilf(-1.3) == -1.0);
+    try expect(ceilf(0.2) == 1.0);
 }
 
-test "math.ceil64" {
-    try expect(ceil64(1.3) == 2.0);
-    try expect(ceil64(-1.3) == -1.0);
-    try expect(ceil64(0.2) == 1.0);
+test "ceil64" {
+    try expect(ceil(1.3) == 2.0);
+    try expect(ceil(-1.3) == -1.0);
+    try expect(ceil(0.2) == 1.0);
 }
 
-test "math.ceil128" {
-    try expect(ceil128(1.3) == 2.0);
-    try expect(ceil128(-1.3) == -1.0);
-    try expect(ceil128(0.2) == 1.0);
+test "ceil128" {
+    try expect(ceilq(1.3) == 2.0);
+    try expect(ceilq(-1.3) == -1.0);
+    try expect(ceilq(0.2) == 1.0);
 }
 
-test "math.ceil32.special" {
-    try expect(ceil32(0.0) == 0.0);
-    try expect(ceil32(-0.0) == -0.0);
-    try expect(math.isPositiveInf(ceil32(math.inf(f32))));
-    try expect(math.isNegativeInf(ceil32(-math.inf(f32))));
-    try expect(math.isNan(ceil32(math.nan(f32))));
+test "ceil32.special" {
+    try expect(ceilf(0.0) == 0.0);
+    try expect(ceilf(-0.0) == -0.0);
+    try expect(math.isPositiveInf(ceilf(math.inf(f32))));
+    try expect(math.isNegativeInf(ceilf(-math.inf(f32))));
+    try expect(math.isNan(ceilf(math.nan(f32))));
 }
 
-test "math.ceil64.special" {
-    try expect(ceil64(0.0) == 0.0);
-    try expect(ceil64(-0.0) == -0.0);
-    try expect(math.isPositiveInf(ceil64(math.inf(f64))));
-    try expect(math.isNegativeInf(ceil64(-math.inf(f64))));
-    try expect(math.isNan(ceil64(math.nan(f64))));
+test "ceil64.special" {
+    try expect(ceil(0.0) == 0.0);
+    try expect(ceil(-0.0) == -0.0);
+    try expect(math.isPositiveInf(ceil(math.inf(f64))));
+    try expect(math.isNegativeInf(ceil(-math.inf(f64))));
+    try expect(math.isNan(ceil(math.nan(f64))));
 }
 
-test "math.ceil128.special" {
-    try expect(ceil128(0.0) == 0.0);
-    try expect(ceil128(-0.0) == -0.0);
-    try expect(math.isPositiveInf(ceil128(math.inf(f128))));
-    try expect(math.isNegativeInf(ceil128(-math.inf(f128))));
-    try expect(math.isNan(ceil128(math.nan(f128))));
+test "ceil128.special" {
+    try expect(ceilq(0.0) == 0.0);
+    try expect(ceilq(-0.0) == -0.0);
+    try expect(math.isPositiveInf(ceilq(math.inf(f128))));
+    try expect(math.isNegativeInf(ceilq(-math.inf(f128))));
+    try expect(math.isNan(ceilq(math.nan(f128))));
 }
diff --git a/lib/std/special/compiler_rt/cos.zig b/lib/std/special/compiler_rt/cos.zig
new file mode 100644
index 000000000000..957e5f9c91be
--- /dev/null
+++ b/lib/std/special/compiler_rt/cos.zig
@@ -0,0 +1,144 @@
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+
+const trig = @import("trig.zig");
+const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
+const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+
+pub fn __cosh(a: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, cosf(a));
+}
+
+pub fn cosf(x: f32) callconv(.C) f32 {
+    // Small multiples of pi/2 rounded to double precision.
+    const c1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
+    const c2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
+    const c3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
+    const c4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
+
+    var ix = @bitCast(u32, x);
+    const sign = ix >> 31 != 0;
+    ix &= 0x7fffffff;
+
+    if (ix <= 0x3f490fda) { // |x| ~<= pi/4
+        if (ix < 0x39800000) { // |x| < 2**-12
+            // raise inexact if x != 0
+            math.doNotOptimizeAway(x + 0x1p120);
+            return 1.0;
+        }
+        return trig.__cosdf(x);
+    }
+    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
+        if (ix > 0x4016cbe3) { // |x|  ~> 3*pi/4
+            return -trig.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
+        } else {
+            if (sign) {
+                return trig.__sindf(x + c1pio2);
+            } else {
+                return trig.__sindf(c1pio2 - x);
+            }
+        }
+    }
+    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
+        if (ix > 0x40afeddf) { // |x| ~> 7*pi/4
+            return trig.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
+        } else {
+            if (sign) {
+                return trig.__sindf(-x - c3pio2);
+            } else {
+                return trig.__sindf(x - c3pio2);
+            }
+        }
+    }
+
+    // cos(Inf or NaN) is NaN
+    if (ix >= 0x7f800000) {
+        return x - x;
+    }
+
+    var y: f64 = undefined;
+    const n = rem_pio2f(x, &y);
+    return switch (n & 3) {
+        0 => trig.__cosdf(y),
+        1 => trig.__sindf(-y),
+        2 => -trig.__cosdf(y),
+        else => trig.__sindf(y),
+    };
+}
+
+pub fn cos(x: f64) callconv(.C) f64 {
+    var ix = @bitCast(u64, x) >> 32;
+    ix &= 0x7fffffff;
+
+    // |x| ~< pi/4
+    if (ix <= 0x3fe921fb) {
+        if (ix < 0x3e46a09e) { // |x| < 2**-27 * sqrt(2)
+            // raise inexact if x!=0
+            math.doNotOptimizeAway(x + 0x1p120);
+            return 1.0;
+        }
+        return trig.__cos(x, 0);
+    }
+
+    // cos(Inf or NaN) is NaN
+    if (ix >= 0x7ff00000) {
+        return x - x;
+    }
+
+    var y: [2]f64 = undefined;
+    const n = rem_pio2(x, &y);
+    return switch (n & 3) {
+        0 => trig.__cos(y[0], y[1]),
+        1 => -trig.__sin(y[0], y[1], 1),
+        2 => -trig.__cos(y[0], y[1]),
+        else => trig.__sin(y[0], y[1], 1),
+    };
+}
+
+pub fn __cosx(a: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, cosq(a));
+}
+
+pub fn cosq(a: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return cos(@floatCast(f64, a));
+}
+
+test "cos32" {
+    const epsilon = 0.00001;
+
+    try expect(math.approxEqAbs(f32, cosf(0.0), 1.0, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(0.2), 0.980067, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(0.8923), 0.627623, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(1.5), 0.070737, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(-1.5), 0.070737, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(37.45), 0.969132, epsilon));
+    try expect(math.approxEqAbs(f32, cosf(89.123), 0.400798, epsilon));
+}
+
+test "cos64" {
+    const epsilon = 0.000001;
+
+    try expect(math.approxEqAbs(f64, cos(0.0), 1.0, epsilon));
+    try expect(math.approxEqAbs(f64, cos(0.2), 0.980067, epsilon));
+    try expect(math.approxEqAbs(f64, cos(0.8923), 0.627623, epsilon));
+    try expect(math.approxEqAbs(f64, cos(1.5), 0.070737, epsilon));
+    try expect(math.approxEqAbs(f64, cos(-1.5), 0.070737, epsilon));
+    try expect(math.approxEqAbs(f64, cos(37.45), 0.969132, epsilon));
+    try expect(math.approxEqAbs(f64, cos(89.123), 0.40080, epsilon));
+}
+
+test "cos32.special" {
+    try expect(math.isNan(cosf(math.inf(f32))));
+    try expect(math.isNan(cosf(-math.inf(f32))));
+    try expect(math.isNan(cosf(math.nan(f32))));
+}
+
+test "cos64.special" {
+    try expect(math.isNan(cos(math.inf(f64))));
+    try expect(math.isNan(cos(-math.inf(f64))));
+    try expect(math.isNan(cos(math.nan(f64))));
+}
diff --git a/lib/std/special/compiler_rt/divxf3_test.zig b/lib/std/special/compiler_rt/divxf3_test.zig
index b79b90c6fb05..0ed2b7421736 100644
--- a/lib/std/special/compiler_rt/divxf3_test.zig
+++ b/lib/std/special/compiler_rt/divxf3_test.zig
@@ -30,9 +30,9 @@ fn test__divxf3(a: f80, b: f80) !void {
     const x_minus_eps = @bitCast(f80, (@bitCast(u80, x) - 1) | integerBit);
 
     // Make sure result is more accurate than the adjacent floats
-    const err_x = std.math.fabs(@mulAdd(f80, x, b, -a));
-    const err_x_plus_eps = std.math.fabs(@mulAdd(f80, x_plus_eps, b, -a));
-    const err_x_minus_eps = std.math.fabs(@mulAdd(f80, x_minus_eps, b, -a));
+    const err_x = @fabs(@mulAdd(f80, x, b, -a));
+    const err_x_plus_eps = @fabs(@mulAdd(f80, x_plus_eps, b, -a));
+    const err_x_minus_eps = @fabs(@mulAdd(f80, x_minus_eps, b, -a));
 
     try testing.expect(err_x_minus_eps > err_x);
     try testing.expect(err_x_plus_eps > err_x);
diff --git a/lib/std/math/exp.zig b/lib/std/special/compiler_rt/exp.zig
similarity index 74%
rename from lib/std/math/exp.zig
rename to lib/std/special/compiler_rt/exp.zig
index 71a492c7ad29..0f129dfd4cf0 100644
--- a/lib/std/math/exp.zig
+++ b/lib/std/special/compiler_rt/exp.zig
@@ -4,25 +4,16 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/expf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/exp.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;
 
-/// Returns e raised to the power of x (e^x).
-///
-/// Special Cases:
-///  - exp(+inf) = +inf
-///  - exp(nan)  = nan
-pub fn exp(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => exp32(x),
-        f64 => exp64(x),
-        else => @compileError("exp not implemented for " ++ @typeName(T)),
-    };
+pub fn __exph(a: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, expf(a));
 }
 
-fn exp32(x_: f32) f32 {
+pub fn expf(x_: f32) callconv(.C) f32 {
     const half = [_]f32{ 0.5, -0.5 };
     const ln2hi = 6.9314575195e-1;
     const ln2lo = 1.4286067653e-6;
@@ -97,7 +88,7 @@ fn exp32(x_: f32) f32 {
     }
 }
 
-fn exp64(x_: f64) f64 {
+pub fn exp(x_: f64) callconv(.C) f64 {
     const half = [_]f64{ 0.5, -0.5 };
     const ln2hi: f64 = 6.93147180369123816490e-01;
     const ln2lo: f64 = 1.90821492927058770002e-10;
@@ -181,37 +172,42 @@ fn exp64(x_: f64) f64 {
     }
 }
 
-test "math.exp" {
-    try expect(exp(@as(f32, 0.0)) == exp32(0.0));
-    try expect(exp(@as(f64, 0.0)) == exp64(0.0));
+pub fn __expx(a: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, expq(a));
 }
 
-test "math.exp32" {
+pub fn expq(a: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return exp(@floatCast(f64, a));
+}
+
+test "exp32" {
     const epsilon = 0.000001;
 
-    try expect(exp32(0.0) == 1.0);
-    try expect(math.approxEqAbs(f32, exp32(0.0), 1.0, epsilon));
-    try expect(math.approxEqAbs(f32, exp32(0.2), 1.221403, epsilon));
-    try expect(math.approxEqAbs(f32, exp32(0.8923), 2.440737, epsilon));
-    try expect(math.approxEqAbs(f32, exp32(1.5), 4.481689, epsilon));
+    try expect(expf(0.0) == 1.0);
+    try expect(math.approxEqAbs(f32, expf(0.0), 1.0, epsilon));
+    try expect(math.approxEqAbs(f32, expf(0.2), 1.221403, epsilon));
+    try expect(math.approxEqAbs(f32, expf(0.8923), 2.440737, epsilon));
+    try expect(math.approxEqAbs(f32, expf(1.5), 4.481689, epsilon));
 }
 
-test "math.exp64" {
+test "exp64" {
     const epsilon = 0.000001;
 
-    try expect(exp64(0.0) == 1.0);
-    try expect(math.approxEqAbs(f64, exp64(0.0), 1.0, epsilon));
-    try expect(math.approxEqAbs(f64, exp64(0.2), 1.221403, epsilon));
-    try expect(math.approxEqAbs(f64, exp64(0.8923), 2.440737, epsilon));
-    try expect(math.approxEqAbs(f64, exp64(1.5), 4.481689, epsilon));
+    try expect(exp(0.0) == 1.0);
+    try expect(math.approxEqAbs(f64, exp(0.0), 1.0, epsilon));
+    try expect(math.approxEqAbs(f64, exp(0.2), 1.221403, epsilon));
+    try expect(math.approxEqAbs(f64, exp(0.8923), 2.440737, epsilon));
+    try expect(math.approxEqAbs(f64, exp(1.5), 4.481689, epsilon));
 }
 
-test "math.exp32.special" {
-    try expect(math.isPositiveInf(exp32(math.inf(f32))));
-    try expect(math.isNan(exp32(math.nan(f32))));
+test "exp32.special" {
+    try expect(math.isPositiveInf(expf(math.inf(f32))));
+    try expect(math.isNan(expf(math.nan(f32))));
 }
 
-test "math.exp64.special" {
-    try expect(math.isPositiveInf(exp64(math.inf(f64))));
-    try expect(math.isNan(exp64(math.nan(f64))));
+test "exp64.special" {
+    try expect(math.isPositiveInf(exp(math.inf(f64))));
+    try expect(math.isNan(exp(math.nan(f64))));
 }
diff --git a/lib/std/math/exp2.zig b/lib/std/special/compiler_rt/exp2.zig
similarity index 89%
rename from lib/std/math/exp2.zig
rename to lib/std/special/compiler_rt/exp2.zig
index 76530ec61fc9..53432a831d13 100644
--- a/lib/std/math/exp2.zig
+++ b/lib/std/special/compiler_rt/exp2.zig
@@ -4,44 +4,16 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/exp2f.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/exp2.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;
 
-/// Returns 2 raised to the power of x (2^x).
-///
-/// Special Cases:
-///  - exp2(+inf) = +inf
-///  - exp2(nan)  = nan
-pub fn exp2(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => exp2_32(x),
-        f64 => exp2_64(x),
-        else => @compileError("exp2 not implemented for " ++ @typeName(T)),
-    };
+pub fn __exp2h(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, exp2f(x));
 }
 
-const exp2ft = [_]f64{
-    0x1.6a09e667f3bcdp-1,
-    0x1.7a11473eb0187p-1,
-    0x1.8ace5422aa0dbp-1,
-    0x1.9c49182a3f090p-1,
-    0x1.ae89f995ad3adp-1,
-    0x1.c199bdd85529cp-1,
-    0x1.d5818dcfba487p-1,
-    0x1.ea4afa2a490dap-1,
-    0x1.0000000000000p+0,
-    0x1.0b5586cf9890fp+0,
-    0x1.172b83c7d517bp+0,
-    0x1.2387a6e756238p+0,
-    0x1.306fe0a31b715p+0,
-    0x1.3dea64c123422p+0,
-    0x1.4bfdad5362a27p+0,
-    0x1.5ab07dd485429p+0,
-};
-
-fn exp2_32(x: f32) f32 {
+pub fn exp2f(x: f32) callconv(.C) f32 {
     const tblsiz = @intCast(u32, exp2ft.len);
     const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
     const P1: f32 = 0x1.62e430p-1;
@@ -98,6 +70,104 @@ fn exp2_32(x: f32) f32 {
     return @floatCast(f32, r * uk);
 }
 
+pub fn exp2(x: f64) callconv(.C) f64 {
+    const tblsiz: u32 = @intCast(u32, exp2dt.len / 2);
+    const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
+    const P1: f64 = 0x1.62e42fefa39efp-1;
+    const P2: f64 = 0x1.ebfbdff82c575p-3;
+    const P3: f64 = 0x1.c6b08d704a0a6p-5;
+    const P4: f64 = 0x1.3b2ab88f70400p-7;
+    const P5: f64 = 0x1.5d88003875c74p-10;
+
+    const ux = @bitCast(u64, x);
+    const ix = @intCast(u32, ux >> 32) & 0x7FFFFFFF;
+
+    // TODO: This should be handled beneath.
+    if (math.isNan(x)) {
+        return math.nan(f64);
+    }
+
+    // |x| >= 1022 or nan
+    if (ix >= 0x408FF000) {
+        // x >= 1024 or nan
+        if (ix >= 0x40900000 and ux >> 63 == 0) {
+            math.raiseOverflow();
+            return math.inf(f64);
+        }
+        // -inf or -nan
+        if (ix >= 0x7FF00000) {
+            return -1 / x;
+        }
+        // x <= -1022
+        if (ux >> 63 != 0) {
+            // underflow
+            if (x <= -1075 or x - 0x1.0p52 + 0x1.0p52 != x) {
+                math.doNotOptimizeAway(@floatCast(f32, -0x1.0p-149 / x));
+            }
+            if (x <= -1075) {
+                return 0;
+            }
+        }
+    }
+    // |x| < 0x1p-54
+    else if (ix < 0x3C900000) {
+        return 1.0 + x;
+    }
+
+    // NOTE: musl relies on unsafe behaviours which are replicated below
+    // (addition overflow, division truncation, casting). Appears that this
+    // produces the intended result but should confirm how GCC/Clang handle this
+    // to ensure.
+
+    // reduce x
+    var uf: f64 = x + redux;
+    // NOTE: musl performs an implicit 64-bit to 32-bit u32 truncation here
+    var i_0: u32 = @truncate(u32, @bitCast(u64, uf));
+    i_0 +%= tblsiz / 2;
+
+    const k: u32 = i_0 / tblsiz * tblsiz;
+    const ik: i32 = @divTrunc(@bitCast(i32, k), tblsiz);
+    i_0 %= tblsiz;
+    uf -= redux;
+
+    // r = exp2(y) = exp2t[i_0] * p(z - eps[i])
+    var z: f64 = x - uf;
+    const t: f64 = exp2dt[@intCast(usize, 2 * i_0)];
+    z -= exp2dt[@intCast(usize, 2 * i_0 + 1)];
+    const r: f64 = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
+
+    return math.scalbn(r, ik);
+}
+
+pub fn __exp2x(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, exp2q(x));
+}
+
+pub fn exp2q(x: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return exp2(@floatCast(f64, x));
+}
+
+const exp2ft = [_]f64{
+    0x1.6a09e667f3bcdp-1,
+    0x1.7a11473eb0187p-1,
+    0x1.8ace5422aa0dbp-1,
+    0x1.9c49182a3f090p-1,
+    0x1.ae89f995ad3adp-1,
+    0x1.c199bdd85529cp-1,
+    0x1.d5818dcfba487p-1,
+    0x1.ea4afa2a490dap-1,
+    0x1.0000000000000p+0,
+    0x1.0b5586cf9890fp+0,
+    0x1.172b83c7d517bp+0,
+    0x1.2387a6e756238p+0,
+    0x1.306fe0a31b715p+0,
+    0x1.3dea64c123422p+0,
+    0x1.4bfdad5362a27p+0,
+    0x1.5ab07dd485429p+0,
+};
+
 const exp2dt = [_]f64{
     //  exp2(z + eps)          eps
     0x1.6a09e667f3d5dp-1, 0x1.9880p-44,
@@ -358,108 +428,34 @@ const exp2dt = [_]f64{
     0x1.690f4b19e9471p+0, -0x1.9780p-45,
 };
 
-fn exp2_64(x: f64) f64 {
-    const tblsiz: u32 = @intCast(u32, exp2dt.len / 2);
-    const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
-    const P1: f64 = 0x1.62e42fefa39efp-1;
-    const P2: f64 = 0x1.ebfbdff82c575p-3;
-    const P3: f64 = 0x1.c6b08d704a0a6p-5;
-    const P4: f64 = 0x1.3b2ab88f70400p-7;
-    const P5: f64 = 0x1.5d88003875c74p-10;
-
-    const ux = @bitCast(u64, x);
-    const ix = @intCast(u32, ux >> 32) & 0x7FFFFFFF;
-
-    // TODO: This should be handled beneath.
-    if (math.isNan(x)) {
-        return math.nan(f64);
-    }
-
-    // |x| >= 1022 or nan
-    if (ix >= 0x408FF000) {
-        // x >= 1024 or nan
-        if (ix >= 0x40900000 and ux >> 63 == 0) {
-            math.raiseOverflow();
-            return math.inf(f64);
-        }
-        // -inf or -nan
-        if (ix >= 0x7FF00000) {
-            return -1 / x;
-        }
-        // x <= -1022
-        if (ux >> 63 != 0) {
-            // underflow
-            if (x <= -1075 or x - 0x1.0p52 + 0x1.0p52 != x) {
-                math.doNotOptimizeAway(@floatCast(f32, -0x1.0p-149 / x));
-            }
-            if (x <= -1075) {
-                return 0;
-            }
-        }
-    }
-    // |x| < 0x1p-54
-    else if (ix < 0x3C900000) {
-        return 1.0 + x;
-    }
-
-    // NOTE: musl relies on unsafe behaviours which are replicated below
-    // (addition overflow, division truncation, casting). Appears that this
-    // produces the intended result but should confirm how GCC/Clang handle this
-    // to ensure.
-
-    // reduce x
-    var uf: f64 = x + redux;
-    // NOTE: musl performs an implicit 64-bit to 32-bit u32 truncation here
-    var i_0: u32 = @truncate(u32, @bitCast(u64, uf));
-    i_0 +%= tblsiz / 2;
-
-    const k: u32 = i_0 / tblsiz * tblsiz;
-    const ik: i32 = @divTrunc(@bitCast(i32, k), tblsiz);
-    i_0 %= tblsiz;
-    uf -= redux;
-
-    // r = exp2(y) = exp2t[i_0] * p(z - eps[i])
-    var z: f64 = x - uf;
-    const t: f64 = exp2dt[@intCast(usize, 2 * i_0)];
-    z -= exp2dt[@intCast(usize, 2 * i_0 + 1)];
-    const r: f64 = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
-
-    return math.scalbn(r, ik);
-}
-
-test "math.exp2" {
-    try expect(exp2(@as(f32, 0.8923)) == exp2_32(0.8923));
-    try expect(exp2(@as(f64, 0.8923)) == exp2_64(0.8923));
-}
-
-test "math.exp2_32" {
+test "exp2_32" {
     const epsilon = 0.000001;
 
-    try expect(exp2_32(0.0) == 1.0);
-    try expect(math.approxEqAbs(f32, exp2_32(0.2), 1.148698, epsilon));
-    try expect(math.approxEqAbs(f32, exp2_32(0.8923), 1.856133, epsilon));
-    try expect(math.approxEqAbs(f32, exp2_32(1.5), 2.828427, epsilon));
-    try expect(math.approxEqAbs(f32, exp2_32(37.45), 187747237888, epsilon));
-    try expect(math.approxEqAbs(f32, exp2_32(-1), 0.5, epsilon));
+    try expect(exp2f(0.0) == 1.0);
+    try expect(math.approxEqAbs(f32, exp2f(0.2), 1.148698, epsilon));
+    try expect(math.approxEqAbs(f32, exp2f(0.8923), 1.856133, epsilon));
+    try expect(math.approxEqAbs(f32, exp2f(1.5), 2.828427, epsilon));
+    try expect(math.approxEqAbs(f32, exp2f(37.45), 187747237888, epsilon));
+    try expect(math.approxEqAbs(f32, exp2f(-1), 0.5, epsilon));
 }
 
-test "math.exp2_64" {
+test "exp2_64" {
     const epsilon = 0.000001;
 
-    try expect(exp2_64(0.0) == 1.0);
-    try expect(math.approxEqAbs(f64, exp2_64(0.2), 1.148698, epsilon));
-    try expect(math.approxEqAbs(f64, exp2_64(0.8923), 1.856133, epsilon));
-    try expect(math.approxEqAbs(f64, exp2_64(1.5), 2.828427, epsilon));
-    try expect(math.approxEqAbs(f64, exp2_64(-1), 0.5, epsilon));
-    try expect(math.approxEqAbs(f64, exp2_64(-0x1.a05cc754481d1p-2), 0x1.824056efc687cp-1, epsilon));
+    try expect(exp2(0.0) == 1.0);
+    try expect(math.approxEqAbs(f64, exp2(0.2), 1.148698, epsilon));
+    try expect(math.approxEqAbs(f64, exp2(0.8923), 1.856133, epsilon));
+    try expect(math.approxEqAbs(f64, exp2(1.5), 2.828427, epsilon));
+    try expect(math.approxEqAbs(f64, exp2(-1), 0.5, epsilon));
+    try expect(math.approxEqAbs(f64, exp2(-0x1.a05cc754481d1p-2), 0x1.824056efc687cp-1, epsilon));
 }
 
-test "math.exp2_32.special" {
-    try expect(math.isPositiveInf(exp2_32(math.inf(f32))));
-    try expect(math.isNan(exp2_32(math.nan(f32))));
+test "exp2_32.special" {
+    try expect(math.isPositiveInf(exp2f(math.inf(f32))));
+    try expect(math.isNan(exp2f(math.nan(f32))));
 }
 
-test "math.exp2_64.special" {
-    try expect(math.isPositiveInf(exp2_64(math.inf(f64))));
-    try expect(math.isNan(exp2_64(math.nan(f64))));
+test "exp2_64.special" {
+    try expect(math.isPositiveInf(exp2(math.inf(f64))));
+    try expect(math.isNan(exp2(math.nan(f64))));
 }
diff --git a/lib/std/special/compiler_rt/fabs.zig b/lib/std/special/compiler_rt/fabs.zig
new file mode 100644
index 000000000000..fbef81fc9a4d
--- /dev/null
+++ b/lib/std/special/compiler_rt/fabs.zig
@@ -0,0 +1,29 @@
+const std = @import("std");
+
+pub fn __fabsh(a: f16) callconv(.C) f16 {
+    return generic_fabs(a);
+}
+
+pub fn fabsf(a: f32) callconv(.C) f32 {
+    return generic_fabs(a);
+}
+
+pub fn fabs(a: f64) callconv(.C) f64 {
+    return generic_fabs(a);
+}
+
+pub fn __fabsx(a: f80) callconv(.C) f80 {
+    return generic_fabs(a);
+}
+
+pub fn fabsq(a: f128) callconv(.C) f128 {
+    return generic_fabs(a);
+}
+
+inline fn generic_fabs(x: anytype) @TypeOf(x) {
+    const T = @TypeOf(x);
+    const TBits = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
+    const float_bits = @bitCast(TBits, x);
+    const remove_sign = ~@as(TBits, 0) >> 1;
+    return @bitCast(T, float_bits & remove_sign);
+}
diff --git a/lib/std/math/floor.zig b/lib/std/special/compiler_rt/floor.zig
similarity index 52%
rename from lib/std/math/floor.zig
rename to lib/std/special/compiler_rt/floor.zig
index ab5ca3583b7d..f6df164b58cd 100644
--- a/lib/std/math/floor.zig
+++ b/lib/std/special/compiler_rt/floor.zig
@@ -4,32 +4,11 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/floorf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/floor.c
 
-const expect = std.testing.expect;
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
+const expect = std.testing.expect;
 
-/// Returns the greatest integer value less than or equal to x.
-///
-/// Special Cases:
-///  - floor(+-0)   = +-0
-///  - floor(+-inf) = +-inf
-///  - floor(nan)   = nan
-pub fn floor(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f16 => floor16(x),
-        f32 => floor32(x),
-        f64 => floor64(x),
-        f128 => floor128(x),
-
-        // TODO this is not correct for some targets
-        c_longdouble => @floatCast(c_longdouble, floor128(x)),
-
-        else => @compileError("floor not implemented for " ++ @typeName(T)),
-    };
-}
-
-fn floor16(x: f16) f16 {
+pub fn __floorh(x: f16) callconv(.C) f16 {
     var u = @bitCast(u16, x);
     const e = @intCast(i16, (u >> 10) & 31) - 15;
     var m: u16 = undefined;
@@ -63,7 +42,7 @@ fn floor16(x: f16) f16 {
     }
 }
 
-fn floor32(x: f32) f32 {
+pub fn floorf(x: f32) callconv(.C) f32 {
     var u = @bitCast(u32, x);
     const e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
     var m: u32 = undefined;
@@ -97,7 +76,7 @@ fn floor32(x: f32) f32 {
     }
 }
 
-fn floor64(x: f64) f64 {
+pub fn floor(x: f64) callconv(.C) f64 {
     const f64_toint = 1.0 / math.floatEps(f64);
 
     const u = @bitCast(u64, x);
@@ -128,7 +107,12 @@ fn floor64(x: f64) f64 {
     }
 }
 
-fn floor128(x: f128) f128 {
+pub fn __floorx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, floorq(x));
+}
+
+pub fn floorq(x: f128) callconv(.C) f128 {
     const f128_toint = 1.0 / math.floatEps(f128);
 
     const u = @bitCast(u128, x);
@@ -157,65 +141,58 @@ fn floor128(x: f128) f128 {
     }
 }
 
-test "math.floor" {
-    try expect(floor(@as(f16, 1.3)) == floor16(1.3));
-    try expect(floor(@as(f32, 1.3)) == floor32(1.3));
-    try expect(floor(@as(f64, 1.3)) == floor64(1.3));
-    try expect(floor(@as(f128, 1.3)) == floor128(1.3));
-}
-
-test "math.floor16" {
-    try expect(floor16(1.3) == 1.0);
-    try expect(floor16(-1.3) == -2.0);
-    try expect(floor16(0.2) == 0.0);
+test "floor16" {
+    try expect(__floorh(1.3) == 1.0);
+    try expect(__floorh(-1.3) == -2.0);
+    try expect(__floorh(0.2) == 0.0);
 }
 
-test "math.floor32" {
-    try expect(floor32(1.3) == 1.0);
-    try expect(floor32(-1.3) == -2.0);
-    try expect(floor32(0.2) == 0.0);
+test "floor32" {
+    try expect(floorf(1.3) == 1.0);
+    try expect(floorf(-1.3) == -2.0);
+    try expect(floorf(0.2) == 0.0);
 }
 
-test "math.floor64" {
-    try expect(floor64(1.3) == 1.0);
-    try expect(floor64(-1.3) == -2.0);
-    try expect(floor64(0.2) == 0.0);
+test "floor64" {
+    try expect(floor(1.3) == 1.0);
+    try expect(floor(-1.3) == -2.0);
+    try expect(floor(0.2) == 0.0);
 }
 
-test "math.floor128" {
-    try expect(floor128(1.3) == 1.0);
-    try expect(floor128(-1.3) == -2.0);
-    try expect(floor128(0.2) == 0.0);
+test "floor128" {
+    try expect(floorq(1.3) == 1.0);
+    try expect(floorq(-1.3) == -2.0);
+    try expect(floorq(0.2) == 0.0);
 }
 
-test "math.floor16.special" {
-    try expect(floor16(0.0) == 0.0);
-    try expect(floor16(-0.0) == -0.0);
-    try expect(math.isPositiveInf(floor16(math.inf(f16))));
-    try expect(math.isNegativeInf(floor16(-math.inf(f16))));
-    try expect(math.isNan(floor16(math.nan(f16))));
+test "floor16.special" {
+    try expect(__floorh(0.0) == 0.0);
+    try expect(__floorh(-0.0) == -0.0);
+    try expect(math.isPositiveInf(__floorh(math.inf(f16))));
+    try expect(math.isNegativeInf(__floorh(-math.inf(f16))));
+    try expect(math.isNan(__floorh(math.nan(f16))));
 }
 
-test "math.floor32.special" {
-    try expect(floor32(0.0) == 0.0);
-    try expect(floor32(-0.0) == -0.0);
-    try expect(math.isPositiveInf(floor32(math.inf(f32))));
-    try expect(math.isNegativeInf(floor32(-math.inf(f32))));
-    try expect(math.isNan(floor32(math.nan(f32))));
+test "floor32.special" {
+    try expect(floorf(0.0) == 0.0);
+    try expect(floorf(-0.0) == -0.0);
+    try expect(math.isPositiveInf(floorf(math.inf(f32))));
+    try expect(math.isNegativeInf(floorf(-math.inf(f32))));
+    try expect(math.isNan(floorf(math.nan(f32))));
 }
 
-test "math.floor64.special" {
-    try expect(floor64(0.0) == 0.0);
-    try expect(floor64(-0.0) == -0.0);
-    try expect(math.isPositiveInf(floor64(math.inf(f64))));
-    try expect(math.isNegativeInf(floor64(-math.inf(f64))));
-    try expect(math.isNan(floor64(math.nan(f64))));
+test "floor64.special" {
+    try expect(floor(0.0) == 0.0);
+    try expect(floor(-0.0) == -0.0);
+    try expect(math.isPositiveInf(floor(math.inf(f64))));
+    try expect(math.isNegativeInf(floor(-math.inf(f64))));
+    try expect(math.isNan(floor(math.nan(f64))));
 }
 
-test "math.floor128.special" {
-    try expect(floor128(0.0) == 0.0);
-    try expect(floor128(-0.0) == -0.0);
-    try expect(math.isPositiveInf(floor128(math.inf(f128))));
-    try expect(math.isNegativeInf(floor128(-math.inf(f128))));
-    try expect(math.isNan(floor128(math.nan(f128))));
+test "floor128.special" {
+    try expect(floorq(0.0) == 0.0);
+    try expect(floorq(-0.0) == -0.0);
+    try expect(math.isPositiveInf(floorq(math.inf(f128))));
+    try expect(math.isNegativeInf(floorq(-math.inf(f128))));
+    try expect(math.isNan(floorq(math.nan(f128))));
 }
diff --git a/lib/std/math/fma.zig b/lib/std/special/compiler_rt/fma.zig
similarity index 76%
rename from lib/std/math/fma.zig
rename to lib/std/special/compiler_rt/fma.zig
index 7afc6e557e88..4c603bf09570 100644
--- a/lib/std/math/fma.zig
+++ b/lib/std/special/compiler_rt/fma.zig
@@ -5,27 +5,16 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/fmaf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/fma.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;
 
-/// Returns x * y + z with a single rounding error.
-pub fn fma(comptime T: type, x: T, y: T, z: T) T {
-    return switch (T) {
-        f32 => fma32(x, y, z),
-        f64 => fma64(x, y, z),
-        f128 => fma128(x, y, z),
-
-        // TODO this is not correct for some targets
-        c_longdouble => @floatCast(c_longdouble, fma128(x, y, z)),
-
-        f80 => @floatCast(f80, fma128(x, y, z)),
-
-        else => @compileError("fma not implemented for " ++ @typeName(T)),
-    };
+pub fn __fmah(x: f16, y: f16, z: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, fmaf(x, y, z));
 }
 
-fn fma32(x: f32, y: f32, z: f32) f32 {
+pub fn fmaf(x: f32, y: f32, z: f32) callconv(.C) f32 {
     const xy = @as(f64, x) * y;
     const xy_z = xy + z;
     const u = @bitCast(u64, xy_z);
@@ -39,8 +28,8 @@ fn fma32(x: f32, y: f32, z: f32) f32 {
     }
 }
 
-// NOTE: Upstream fma.c has been rewritten completely to raise fp exceptions more accurately.
-fn fma64(x: f64, y: f64, z: f64) f64 {
+/// NOTE: Upstream fma.c has been rewritten completely to raise fp exceptions more accurately.
+pub fn fma(x: f64, y: f64, z: f64) callconv(.C) f64 {
     if (!math.isFinite(x) or !math.isFinite(y)) {
         return x * y + z;
     }
@@ -87,6 +76,65 @@ fn fma64(x: f64, y: f64, z: f64) f64 {
     }
 }
 
+pub fn __fmax(a: f80, b: f80, c: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, fmaq(a, b, c));
+}
+
+/// Fused multiply-add: Compute x * y + z with a single rounding error.
+///
+/// We use scaling to avoid overflow/underflow, along with the
+/// canonical precision-doubling technique adapted from:
+///
+///      Dekker, T.  A Floating-Point Technique for Extending the
+///      Available Precision.  Numer. Math. 18, 224-242 (1971).
+pub fn fmaq(x: f128, y: f128, z: f128) callconv(.C) f128 {
+    if (!math.isFinite(x) or !math.isFinite(y)) {
+        return x * y + z;
+    }
+    if (!math.isFinite(z)) {
+        return z;
+    }
+    if (x == 0.0 or y == 0.0) {
+        return x * y + z;
+    }
+    if (z == 0.0) {
+        return x * y;
+    }
+
+    const x1 = math.frexp(x);
+    var ex = x1.exponent;
+    var xs = x1.significand;
+    const x2 = math.frexp(y);
+    var ey = x2.exponent;
+    var ys = x2.significand;
+    const x3 = math.frexp(z);
+    var ez = x3.exponent;
+    var zs = x3.significand;
+
+    var spread = ex + ey - ez;
+    if (spread <= 113 * 2) {
+        zs = math.scalbn(zs, -spread);
+    } else {
+        zs = math.copysign(f128, math.floatMin(f128), zs);
+    }
+
+    const xy = dd_mul128(xs, ys);
+    const r = dd_add128(xy.hi, zs);
+    spread = ex + ey;
+
+    if (r.hi == 0.0) {
+        return xy.hi + zs + math.scalbn(xy.lo, spread);
+    }
+
+    const adj = add_adjusted128(r.lo, xy.lo);
+    if (spread + math.ilogb(r.hi) > -16383) {
+        return math.scalbn(r.hi + adj, spread);
+    } else {
+        return add_and_denorm128(r.hi, adj, spread);
+    }
+}
+
 const dd = struct {
     hi: f64,
     lo: f64,
@@ -242,98 +290,38 @@ fn dd_mul128(a: f128, b: f128) dd128 {
     return ret;
 }
 
-/// Fused multiply-add: Compute x * y + z with a single rounding error.
-///
-/// We use scaling to avoid overflow/underflow, along with the
-/// canonical precision-doubling technique adapted from:
-///
-///      Dekker, T.  A Floating-Point Technique for Extending the
-///      Available Precision.  Numer. Math. 18, 224-242 (1971).
-fn fma128(x: f128, y: f128, z: f128) f128 {
-    if (!math.isFinite(x) or !math.isFinite(y)) {
-        return x * y + z;
-    }
-    if (!math.isFinite(z)) {
-        return z;
-    }
-    if (x == 0.0 or y == 0.0) {
-        return x * y + z;
-    }
-    if (z == 0.0) {
-        return x * y;
-    }
-
-    const x1 = math.frexp(x);
-    var ex = x1.exponent;
-    var xs = x1.significand;
-    const x2 = math.frexp(y);
-    var ey = x2.exponent;
-    var ys = x2.significand;
-    const x3 = math.frexp(z);
-    var ez = x3.exponent;
-    var zs = x3.significand;
-
-    var spread = ex + ey - ez;
-    if (spread <= 113 * 2) {
-        zs = math.scalbn(zs, -spread);
-    } else {
-        zs = math.copysign(f128, math.floatMin(f128), zs);
-    }
-
-    const xy = dd_mul128(xs, ys);
-    const r = dd_add128(xy.hi, zs);
-    spread = ex + ey;
-
-    if (r.hi == 0.0) {
-        return xy.hi + zs + math.scalbn(xy.lo, spread);
-    }
-
-    const adj = add_adjusted128(r.lo, xy.lo);
-    if (spread + math.ilogb(r.hi) > -16383) {
-        return math.scalbn(r.hi + adj, spread);
-    } else {
-        return add_and_denorm128(r.hi, adj, spread);
-    }
-}
-
-test "type dispatch" {
-    try expect(fma(f32, 0.0, 1.0, 1.0) == fma32(0.0, 1.0, 1.0));
-    try expect(fma(f64, 0.0, 1.0, 1.0) == fma64(0.0, 1.0, 1.0));
-    try expect(fma(f128, 0.0, 1.0, 1.0) == fma128(0.0, 1.0, 1.0));
-}
-
 test "32" {
     const epsilon = 0.000001;
 
-    try expect(math.approxEqAbs(f32, fma32(0.0, 5.0, 9.124), 9.124, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(0.2, 5.0, 9.124), 10.124, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(0.8923, 5.0, 9.124), 13.5855, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(1.5, 5.0, 9.124), 16.624, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(37.45, 5.0, 9.124), 196.374004, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(89.123, 5.0, 9.124), 454.739005, epsilon));
-    try expect(math.approxEqAbs(f32, fma32(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(0.0, 5.0, 9.124), 9.124, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(0.2, 5.0, 9.124), 10.124, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(0.8923, 5.0, 9.124), 13.5855, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(1.5, 5.0, 9.124), 16.624, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(37.45, 5.0, 9.124), 196.374004, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(89.123, 5.0, 9.124), 454.739005, epsilon));
+    try expect(math.approxEqAbs(f32, fmaf(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
 }
 
 test "64" {
     const epsilon = 0.000001;
 
-    try expect(math.approxEqAbs(f64, fma64(0.0, 5.0, 9.124), 9.124, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(0.2, 5.0, 9.124), 10.124, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(0.8923, 5.0, 9.124), 13.5855, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(1.5, 5.0, 9.124), 16.624, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(37.45, 5.0, 9.124), 196.374, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(89.123, 5.0, 9.124), 454.739, epsilon));
-    try expect(math.approxEqAbs(f64, fma64(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
+    try expect(math.approxEqAbs(f64, fma(0.0, 5.0, 9.124), 9.124, epsilon));
+    try expect(math.approxEqAbs(f64, fma(0.2, 5.0, 9.124), 10.124, epsilon));
+    try expect(math.approxEqAbs(f64, fma(0.8923, 5.0, 9.124), 13.5855, epsilon));
+    try expect(math.approxEqAbs(f64, fma(1.5, 5.0, 9.124), 16.624, epsilon));
+    try expect(math.approxEqAbs(f64, fma(37.45, 5.0, 9.124), 196.374, epsilon));
+    try expect(math.approxEqAbs(f64, fma(89.123, 5.0, 9.124), 454.739, epsilon));
+    try expect(math.approxEqAbs(f64, fma(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
 }
 
 test "128" {
     const epsilon = 0.000001;
 
-    try expect(math.approxEqAbs(f128, fma128(0.0, 5.0, 9.124), 9.124, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(0.2, 5.0, 9.124), 10.124, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(0.8923, 5.0, 9.124), 13.5855, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(1.5, 5.0, 9.124), 16.624, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(37.45, 5.0, 9.124), 196.374, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(89.123, 5.0, 9.124), 454.739, epsilon));
-    try expect(math.approxEqAbs(f128, fma128(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(0.0, 5.0, 9.124), 9.124, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(0.2, 5.0, 9.124), 10.124, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(0.8923, 5.0, 9.124), 13.5855, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(1.5, 5.0, 9.124), 16.624, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(37.45, 5.0, 9.124), 196.374, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(89.123, 5.0, 9.124), 454.739, epsilon));
+    try expect(math.approxEqAbs(f128, fmaq(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
 }
diff --git a/lib/std/special/compiler_rt/fmax.zig b/lib/std/special/compiler_rt/fmax.zig
new file mode 100644
index 000000000000..a5bd68cd7428
--- /dev/null
+++ b/lib/std/special/compiler_rt/fmax.zig
@@ -0,0 +1,43 @@
+const std = @import("std");
+const math = std.math;
+
+pub fn __fmaxh(x: f16, y: f16) callconv(.C) f16 {
+    return generic_fmax(f16, x, y);
+}
+
+pub fn fmaxf(x: f32, y: f32) callconv(.C) f32 {
+    return generic_fmax(f32, x, y);
+}
+
+pub fn fmax(x: f64, y: f64) callconv(.C) f64 {
+    return generic_fmax(f64, x, y);
+}
+
+pub fn __fmaxx(x: f80, y: f80) callconv(.C) f80 {
+    return generic_fmax(f80, x, y);
+}
+
+pub fn fmaxq(x: f128, y: f128) callconv(.C) f128 {
+    return generic_fmax(f128, x, y);
+}
+
+inline fn generic_fmax(comptime T: type, x: T, y: T) T {
+    if (math.isNan(x))
+        return y;
+    if (math.isNan(y))
+        return x;
+    return if (x < y) y else x;
+}
+
+test "generic_fmax" {
+    inline for ([_]type{ f32, f64, c_longdouble, f80, f128 }) |T| {
+        const nan_val = math.nan(T);
+
+        try std.testing.expect(math.isNan(generic_fmax(T, nan_val, nan_val)));
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, nan_val, 1.0));
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, 1.0, nan_val));
+
+        try std.testing.expectEqual(@as(T, 10.0), generic_fmax(T, 1.0, 10.0));
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmax(T, 1.0, -1.0));
+    }
+}
diff --git a/lib/std/special/compiler_rt/fmin.zig b/lib/std/special/compiler_rt/fmin.zig
new file mode 100644
index 000000000000..cc4dbf082bc7
--- /dev/null
+++ b/lib/std/special/compiler_rt/fmin.zig
@@ -0,0 +1,43 @@
+const std = @import("std");
+const math = std.math;
+
+pub fn __fminh(x: f16, y: f16) callconv(.C) f16 {
+    return generic_fmin(f16, x, y);
+}
+
+pub fn fminf(x: f32, y: f32) callconv(.C) f32 {
+    return generic_fmin(f32, x, y);
+}
+
+pub fn fmin(x: f64, y: f64) callconv(.C) f64 {
+    return generic_fmin(f64, x, y);
+}
+
+pub fn __fminx(x: f80, y: f80) callconv(.C) f80 {
+    return generic_fmin(f80, x, y);
+}
+
+pub fn fminq(x: f128, y: f128) callconv(.C) f128 {
+    return generic_fmin(f128, x, y);
+}
+
+inline fn generic_fmin(comptime T: type, x: T, y: T) T {
+    if (math.isNan(x))
+        return y;
+    if (math.isNan(y))
+        return x;
+    return if (x < y) x else y;
+}
+
+test "generic_fmin" {
+    inline for ([_]type{ f32, f64, c_longdouble, f80, f128 }) |T| {
+        const nan_val = math.nan(T);
+
+        try std.testing.expect(math.isNan(generic_fmin(T, nan_val, nan_val)));
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, nan_val, 1.0));
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, 1.0, nan_val));
+
+        try std.testing.expectEqual(@as(T, 1.0), generic_fmin(T, 1.0, 10.0));
+        try std.testing.expectEqual(@as(T, -1.0), generic_fmin(T, 1.0, -1.0));
+    }
+}
diff --git a/lib/std/special/compiler_rt/fmod.zig b/lib/std/special/compiler_rt/fmod.zig
new file mode 100644
index 000000000000..b9a5710b9cf1
--- /dev/null
+++ b/lib/std/special/compiler_rt/fmod.zig
@@ -0,0 +1,351 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const math = std.math;
+const assert = std.debug.assert;
+const normalize = @import("divdf3.zig").normalize;
+
+pub fn __fmodh(x: f16, y: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, fmodf(x, y));
+}
+
+pub fn fmodf(x: f32, y: f32) callconv(.C) f32 {
+    return generic_fmod(f32, x, y);
+}
+
+pub fn fmod(x: f64, y: f64) callconv(.C) f64 {
+    return generic_fmod(f64, x, y);
+}
+
+/// fmodx - floating modulo large, returns the remainder of division for f80 types
+/// Logic and flow heavily inspired by MUSL fmodl for 113 mantissa digits
+pub fn __fmodx(a: f80, b: f80) callconv(.C) f80 {
+    @setRuntimeSafety(builtin.is_test);
+
+    const T = f80;
+    const Z = std.meta.Int(.unsigned, @bitSizeOf(T));
+
+    const significandBits = math.floatMantissaBits(T);
+    const fractionalBits = math.floatFractionalBits(T);
+    const exponentBits = math.floatExponentBits(T);
+
+    const signBit = (@as(Z, 1) << (significandBits + exponentBits));
+    const maxExponent = ((1 << exponentBits) - 1);
+
+    var aRep = @bitCast(Z, a);
+    var bRep = @bitCast(Z, b);
+
+    const signA = aRep & signBit;
+    var expA = @intCast(i32, (@bitCast(Z, a) >> significandBits) & maxExponent);
+    var expB = @intCast(i32, (@bitCast(Z, b) >> significandBits) & maxExponent);
+
+    // There are 3 cases where the answer is undefined, check for:
+    //   - fmodx(val, 0)
+    //   - fmodx(val, NaN)
+    //   - fmodx(inf, val)
+    // The sign on checked values does not matter.
+    // Doing (a * b) / (a * b) procudes undefined results
+    // because the three cases always produce undefined calculations:
+    //   - 0 / 0
+    //   - val * NaN
+    //   - inf / inf
+    if (b == 0 or math.isNan(b) or expA == maxExponent) {
+        return (a * b) / (a * b);
+    }
+
+    // Remove the sign from both
+    aRep &= ~signBit;
+    bRep &= ~signBit;
+    if (aRep <= bRep) {
+        if (aRep == bRep) {
+            return 0 * a;
+        }
+        return a;
+    }
+
+    if (expA == 0) expA = normalize(f80, &aRep);
+    if (expB == 0) expB = normalize(f80, &bRep);
+
+    var highA: u64 = 0;
+    var highB: u64 = 0;
+    var lowA: u64 = @truncate(u64, aRep);
+    var lowB: u64 = @truncate(u64, bRep);
+
+    while (expA > expB) : (expA -= 1) {
+        var high = highA -% highB;
+        var low = lowA -% lowB;
+        if (lowA < lowB) {
+            high -%= 1;
+        }
+        if (high >> 63 == 0) {
+            if ((high | low) == 0) {
+                return 0 * a;
+            }
+            highA = 2 *% high + (low >> 63);
+            lowA = 2 *% low;
+        } else {
+            highA = 2 *% highA + (lowA >> 63);
+            lowA = 2 *% lowA;
+        }
+    }
+
+    var high = highA -% highB;
+    var low = lowA -% lowB;
+    if (lowA < lowB) {
+        high -%= 1;
+    }
+    if (high >> 63 == 0) {
+        if ((high | low) == 0) {
+            return 0 * a;
+        }
+        highA = high;
+        lowA = low;
+    }
+
+    while ((lowA >> fractionalBits) == 0) {
+        lowA = 2 *% lowA;
+        expA = expA - 1;
+    }
+
+    // Combine the exponent with the sign and significand, normalize if happened to be denormalized
+    if (expA < -fractionalBits) {
+        return @bitCast(T, signA);
+    } else if (expA <= 0) {
+        return @bitCast(T, (lowA >> @intCast(math.Log2Int(u64), 1 - expA)) | signA);
+    } else {
+        return @bitCast(T, lowA | (@as(Z, @intCast(u16, expA)) << significandBits) | signA);
+    }
+}
+
+/// fmodq - floating modulo large, returns the remainder of division for f128 types
+/// Logic and flow heavily inspired by MUSL fmodl for 113 mantissa digits
+pub fn fmodq(a: f128, b: f128) callconv(.C) f128 {
+    @setRuntimeSafety(builtin.is_test);
+    var amod = a;
+    var bmod = b;
+    const aPtr_u64 = @ptrCast([*]u64, &amod);
+    const bPtr_u64 = @ptrCast([*]u64, &bmod);
+    const aPtr_u16 = @ptrCast([*]u16, &amod);
+    const bPtr_u16 = @ptrCast([*]u16, &bmod);
+
+    const exp_and_sign_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 7,
+        .Big => 0,
+    };
+    const low_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 0,
+        .Big => 1,
+    };
+    const high_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 1,
+        .Big => 0,
+    };
+
+    const signA = aPtr_u16[exp_and_sign_index] & 0x8000;
+    var expA = @intCast(i32, (aPtr_u16[exp_and_sign_index] & 0x7fff));
+    var expB = @intCast(i32, (bPtr_u16[exp_and_sign_index] & 0x7fff));
+
+    // There are 3 cases where the answer is undefined, check for:
+    //   - fmodq(val, 0)
+    //   - fmodq(val, NaN)
+    //   - fmodq(inf, val)
+    // The sign on checked values does not matter.
+    // Doing (a * b) / (a * b) procudes undefined results
+    // because the three cases always produce undefined calculations:
+    //   - 0 / 0
+    //   - val * NaN
+    //   - inf / inf
+    if (b == 0 or std.math.isNan(b) or expA == 0x7fff) {
+        return (a * b) / (a * b);
+    }
+
+    // Remove the sign from both
+    aPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expA));
+    bPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expB));
+    if (amod <= bmod) {
+        if (amod == bmod) {
+            return 0 * a;
+        }
+        return a;
+    }
+
+    if (expA == 0) {
+        amod *= 0x1p120;
+        expA = @as(i32, aPtr_u16[exp_and_sign_index]) - 120;
+    }
+
+    if (expB == 0) {
+        bmod *= 0x1p120;
+        expB = @as(i32, bPtr_u16[exp_and_sign_index]) - 120;
+    }
+
+    // OR in extra non-stored mantissa digit
+    var highA: u64 = (aPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
+    var highB: u64 = (bPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
+    var lowA: u64 = aPtr_u64[low_index];
+    var lowB: u64 = bPtr_u64[low_index];
+
+    while (expA > expB) : (expA -= 1) {
+        var high = highA -% highB;
+        var low = lowA -% lowB;
+        if (lowA < lowB) {
+            high -%= 1;
+        }
+        if (high >> 63 == 0) {
+            if ((high | low) == 0) {
+                return 0 * a;
+            }
+            highA = 2 *% high + (low >> 63);
+            lowA = 2 *% low;
+        } else {
+            highA = 2 *% highA + (lowA >> 63);
+            lowA = 2 *% lowA;
+        }
+    }
+
+    var high = highA -% highB;
+    var low = lowA -% lowB;
+    if (lowA < lowB) {
+        high -= 1;
+    }
+    if (high >> 63 == 0) {
+        if ((high | low) == 0) {
+            return 0 * a;
+        }
+        highA = high;
+        lowA = low;
+    }
+
+    while (highA >> 48 == 0) {
+        highA = 2 *% highA + (lowA >> 63);
+        lowA = 2 *% lowA;
+        expA = expA - 1;
+    }
+
+    // Overwrite the current amod with the values in highA and lowA
+    aPtr_u64[high_index] = highA;
+    aPtr_u64[low_index] = lowA;
+
+    // Combine the exponent with the sign, normalize if happend to be denormalized
+    if (expA <= 0) {
+        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, (expA +% 120))) | signA;
+        amod *= 0x1p-120;
+    } else {
+        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, expA)) | signA;
+    }
+
+    return amod;
+}
+
+inline fn generic_fmod(comptime T: type, x: T, y: T) T {
+    @setRuntimeSafety(false);
+
+    const bits = @typeInfo(T).Float.bits;
+    const uint = std.meta.Int(.unsigned, bits);
+    const log2uint = math.Log2Int(uint);
+    comptime assert(T == f32 or T == f64);
+    const digits = if (T == f32) 23 else 52;
+    const exp_bits = if (T == f32) 9 else 12;
+    const bits_minus_1 = bits - 1;
+    const mask = if (T == f32) 0xff else 0x7ff;
+    var ux = @bitCast(uint, x);
+    var uy = @bitCast(uint, y);
+    var ex = @intCast(i32, (ux >> digits) & mask);
+    var ey = @intCast(i32, (uy >> digits) & mask);
+    const sx = if (T == f32) @intCast(u32, ux & 0x80000000) else @intCast(i32, ux >> bits_minus_1);
+    var i: uint = undefined;
+
+    if (uy << 1 == 0 or math.isNan(@bitCast(T, uy)) or ex == mask)
+        return (x * y) / (x * y);
+
+    if (ux << 1 <= uy << 1) {
+        if (ux << 1 == uy << 1)
+            return 0 * x;
+        return x;
+    }
+
+    // normalize x and y
+    if (ex == 0) {
+        i = ux << exp_bits;
+        while (i >> bits_minus_1 == 0) : ({
+            ex -= 1;
+            i <<= 1;
+        }) {}
+        ux <<= @intCast(log2uint, @bitCast(u32, -ex + 1));
+    } else {
+        ux &= math.maxInt(uint) >> exp_bits;
+        ux |= 1 << digits;
+    }
+    if (ey == 0) {
+        i = uy << exp_bits;
+        while (i >> bits_minus_1 == 0) : ({
+            ey -= 1;
+            i <<= 1;
+        }) {}
+        uy <<= @intCast(log2uint, @bitCast(u32, -ey + 1));
+    } else {
+        uy &= math.maxInt(uint) >> exp_bits;
+        uy |= 1 << digits;
+    }
+
+    // x mod y
+    while (ex > ey) : (ex -= 1) {
+        i = ux -% uy;
+        if (i >> bits_minus_1 == 0) {
+            if (i == 0)
+                return 0 * x;
+            ux = i;
+        }
+        ux <<= 1;
+    }
+    i = ux -% uy;
+    if (i >> bits_minus_1 == 0) {
+        if (i == 0)
+            return 0 * x;
+        ux = i;
+    }
+    while (ux >> digits == 0) : ({
+        ux <<= 1;
+        ex -= 1;
+    }) {}
+
+    // scale result up
+    if (ex > 0) {
+        ux -%= 1 << digits;
+        ux |= @as(uint, @bitCast(u32, ex)) << digits;
+    } else {
+        ux >>= @intCast(log2uint, @bitCast(u32, -ex + 1));
+    }
+    if (T == f32) {
+        ux |= sx;
+    } else {
+        ux |= @intCast(uint, sx) << bits_minus_1;
+    }
+    return @bitCast(T, ux);
+}
+
+test "fmod, fmodf" {
+    inline for ([_]type{ f32, f64 }) |T| {
+        const nan_val = math.nan(T);
+        const inf_val = math.inf(T);
+
+        try std.testing.expect(math.isNan(generic_fmod(T, nan_val, 1.0)));
+        try std.testing.expect(math.isNan(generic_fmod(T, 1.0, nan_val)));
+        try std.testing.expect(math.isNan(generic_fmod(T, inf_val, 1.0)));
+        try std.testing.expect(math.isNan(generic_fmod(T, 0.0, 0.0)));
+        try std.testing.expect(math.isNan(generic_fmod(T, 1.0, 0.0)));
+
+        try std.testing.expectEqual(@as(T, 0.0), generic_fmod(T, 0.0, 2.0));
+        try std.testing.expectEqual(@as(T, -0.0), generic_fmod(T, -0.0, 2.0));
+
+        try std.testing.expectEqual(@as(T, -2.0), generic_fmod(T, -32.0, 10.0));
+        try std.testing.expectEqual(@as(T, -2.0), generic_fmod(T, -32.0, -10.0));
+        try std.testing.expectEqual(@as(T, 2.0), generic_fmod(T, 32.0, 10.0));
+        try std.testing.expectEqual(@as(T, 2.0), generic_fmod(T, 32.0, -10.0));
+    }
+}
+
+test {
+    _ = @import("fmodq_test.zig");
+    _ = @import("fmodx_test.zig");
+}
diff --git a/lib/std/special/compiler_rt/fmodq.zig b/lib/std/special/compiler_rt/fmodq.zig
deleted file mode 100644
index 3f56c4979609..000000000000
--- a/lib/std/special/compiler_rt/fmodq.zig
+++ /dev/null
@@ -1,126 +0,0 @@
-const builtin = @import("builtin");
-const std = @import("std");
-
-// fmodq - floating modulo large, returns the remainder of division for f128 types
-// Logic and flow heavily inspired by MUSL fmodl for 113 mantissa digits
-pub fn fmodq(a: f128, b: f128) callconv(.C) f128 {
-    @setRuntimeSafety(builtin.is_test);
-    var amod = a;
-    var bmod = b;
-    const aPtr_u64 = @ptrCast([*]u64, &amod);
-    const bPtr_u64 = @ptrCast([*]u64, &bmod);
-    const aPtr_u16 = @ptrCast([*]u16, &amod);
-    const bPtr_u16 = @ptrCast([*]u16, &bmod);
-
-    const exp_and_sign_index = comptime switch (builtin.target.cpu.arch.endian()) {
-        .Little => 7,
-        .Big => 0,
-    };
-    const low_index = comptime switch (builtin.target.cpu.arch.endian()) {
-        .Little => 0,
-        .Big => 1,
-    };
-    const high_index = comptime switch (builtin.target.cpu.arch.endian()) {
-        .Little => 1,
-        .Big => 0,
-    };
-
-    const signA = aPtr_u16[exp_and_sign_index] & 0x8000;
-    var expA = @intCast(i32, (aPtr_u16[exp_and_sign_index] & 0x7fff));
-    var expB = @intCast(i32, (bPtr_u16[exp_and_sign_index] & 0x7fff));
-
-    // There are 3 cases where the answer is undefined, check for:
-    //   - fmodq(val, 0)
-    //   - fmodq(val, NaN)
-    //   - fmodq(inf, val)
-    // The sign on checked values does not matter.
-    // Doing (a * b) / (a * b) procudes undefined results
-    // because the three cases always produce undefined calculations:
-    //   - 0 / 0
-    //   - val * NaN
-    //   - inf / inf
-    if (b == 0 or std.math.isNan(b) or expA == 0x7fff) {
-        return (a * b) / (a * b);
-    }
-
-    // Remove the sign from both
-    aPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expA));
-    bPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expB));
-    if (amod <= bmod) {
-        if (amod == bmod) {
-            return 0 * a;
-        }
-        return a;
-    }
-
-    if (expA == 0) {
-        amod *= 0x1p120;
-        expA = @as(i32, aPtr_u16[exp_and_sign_index]) - 120;
-    }
-
-    if (expB == 0) {
-        bmod *= 0x1p120;
-        expB = @as(i32, bPtr_u16[exp_and_sign_index]) - 120;
-    }
-
-    // OR in extra non-stored mantissa digit
-    var highA: u64 = (aPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
-    var highB: u64 = (bPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
-    var lowA: u64 = aPtr_u64[low_index];
-    var lowB: u64 = bPtr_u64[low_index];
-
-    while (expA > expB) : (expA -= 1) {
-        var high = highA -% highB;
-        var low = lowA -% lowB;
-        if (lowA < lowB) {
-            high -%= 1;
-        }
-        if (high >> 63 == 0) {
-            if ((high | low) == 0) {
-                return 0 * a;
-            }
-            highA = 2 *% high + (low >> 63);
-            lowA = 2 *% low;
-        } else {
-            highA = 2 *% highA + (lowA >> 63);
-            lowA = 2 *% lowA;
-        }
-    }
-
-    var high = highA -% highB;
-    var low = lowA -% lowB;
-    if (lowA < lowB) {
-        high -= 1;
-    }
-    if (high >> 63 == 0) {
-        if ((high | low) == 0) {
-            return 0 * a;
-        }
-        highA = high;
-        lowA = low;
-    }
-
-    while (highA >> 48 == 0) {
-        highA = 2 *% highA + (lowA >> 63);
-        lowA = 2 *% lowA;
-        expA = expA - 1;
-    }
-
-    // Overwrite the current amod with the values in highA and lowA
-    aPtr_u64[high_index] = highA;
-    aPtr_u64[low_index] = lowA;
-
-    // Combine the exponent with the sign, normalize if happend to be denormalized
-    if (expA <= 0) {
-        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, (expA +% 120))) | signA;
-        amod *= 0x1p-120;
-    } else {
-        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, expA)) | signA;
-    }
-
-    return amod;
-}
-
-test {
-    _ = @import("fmodq_test.zig");
-}
diff --git a/lib/std/special/compiler_rt/fmodq_test.zig b/lib/std/special/compiler_rt/fmodq_test.zig
index b8baf8ae9be8..07ddb8d182e4 100644
--- a/lib/std/special/compiler_rt/fmodq_test.zig
+++ b/lib/std/special/compiler_rt/fmodq_test.zig
@@ -1,24 +1,24 @@
 const std = @import("std");
-const fmodq = @import("fmodq.zig");
+const fmod = @import("fmod.zig");
 const testing = std.testing;
 
 fn test_fmodq(a: f128, b: f128, exp: f128) !void {
-    const res = fmodq.fmodq(a, b);
+    const res = fmod.fmodq(a, b);
     try testing.expect(exp == res);
 }
 
 fn test_fmodq_nans() !void {
-    try testing.expect(std.math.isNan(fmodq.fmodq(1.0, std.math.nan(f128))));
-    try testing.expect(std.math.isNan(fmodq.fmodq(1.0, -std.math.nan(f128))));
-    try testing.expect(std.math.isNan(fmodq.fmodq(std.math.nan(f128), 1.0)));
-    try testing.expect(std.math.isNan(fmodq.fmodq(-std.math.nan(f128), 1.0)));
+    try testing.expect(std.math.isNan(fmod.fmodq(1.0, std.math.nan(f128))));
+    try testing.expect(std.math.isNan(fmod.fmodq(1.0, -std.math.nan(f128))));
+    try testing.expect(std.math.isNan(fmod.fmodq(std.math.nan(f128), 1.0)));
+    try testing.expect(std.math.isNan(fmod.fmodq(-std.math.nan(f128), 1.0)));
 }
 
 fn test_fmodq_infs() !void {
-    try testing.expect(fmodq.fmodq(1.0, std.math.inf(f128)) == 1.0);
-    try testing.expect(fmodq.fmodq(1.0, -std.math.inf(f128)) == 1.0);
-    try testing.expect(std.math.isNan(fmodq.fmodq(std.math.inf(f128), 1.0)));
-    try testing.expect(std.math.isNan(fmodq.fmodq(-std.math.inf(f128), 1.0)));
+    try testing.expect(fmod.fmodq(1.0, std.math.inf(f128)) == 1.0);
+    try testing.expect(fmod.fmodq(1.0, -std.math.inf(f128)) == 1.0);
+    try testing.expect(std.math.isNan(fmod.fmodq(std.math.inf(f128), 1.0)));
+    try testing.expect(std.math.isNan(fmod.fmodq(-std.math.inf(f128), 1.0)));
 }
 
 test "fmodq" {
diff --git a/lib/std/special/compiler_rt/fmodx.zig b/lib/std/special/compiler_rt/fmodx.zig
deleted file mode 100644
index efe16f9f160d..000000000000
--- a/lib/std/special/compiler_rt/fmodx.zig
+++ /dev/null
@@ -1,108 +0,0 @@
-const builtin = @import("builtin");
-const std = @import("std");
-const math = std.math;
-const normalize = @import("divdf3.zig").normalize;
-
-// fmodx - floating modulo large, returns the remainder of division for f80 types
-// Logic and flow heavily inspired by MUSL fmodl for 113 mantissa digits
-pub fn fmodx(a: f80, b: f80) callconv(.C) f80 {
-    @setRuntimeSafety(builtin.is_test);
-
-    const T = f80;
-    const Z = std.meta.Int(.unsigned, @bitSizeOf(T));
-
-    const significandBits = math.floatMantissaBits(T);
-    const fractionalBits = math.floatFractionalBits(T);
-    const exponentBits = math.floatExponentBits(T);
-
-    const signBit = (@as(Z, 1) << (significandBits + exponentBits));
-    const maxExponent = ((1 << exponentBits) - 1);
-
-    var aRep = @bitCast(Z, a);
-    var bRep = @bitCast(Z, b);
-
-    const signA = aRep & signBit;
-    var expA = @intCast(i32, (@bitCast(Z, a) >> significandBits) & maxExponent);
-    var expB = @intCast(i32, (@bitCast(Z, b) >> significandBits) & maxExponent);
-
-    // There are 3 cases where the answer is undefined, check for:
-    //   - fmodx(val, 0)
-    //   - fmodx(val, NaN)
-    //   - fmodx(inf, val)
-    // The sign on checked values does not matter.
-    // Doing (a * b) / (a * b) procudes undefined results
-    // because the three cases always produce undefined calculations:
-    //   - 0 / 0
-    //   - val * NaN
-    //   - inf / inf
-    if (b == 0 or math.isNan(b) or expA == maxExponent) {
-        return (a * b) / (a * b);
-    }
-
-    // Remove the sign from both
-    aRep &= ~signBit;
-    bRep &= ~signBit;
-    if (aRep <= bRep) {
-        if (aRep == bRep) {
-            return 0 * a;
-        }
-        return a;
-    }
-
-    if (expA == 0) expA = normalize(f80, &aRep);
-    if (expB == 0) expB = normalize(f80, &bRep);
-
-    var highA: u64 = 0;
-    var highB: u64 = 0;
-    var lowA: u64 = @truncate(u64, aRep);
-    var lowB: u64 = @truncate(u64, bRep);
-
-    while (expA > expB) : (expA -= 1) {
-        var high = highA -% highB;
-        var low = lowA -% lowB;
-        if (lowA < lowB) {
-            high -%= 1;
-        }
-        if (high >> 63 == 0) {
-            if ((high | low) == 0) {
-                return 0 * a;
-            }
-            highA = 2 *% high + (low >> 63);
-            lowA = 2 *% low;
-        } else {
-            highA = 2 *% highA + (lowA >> 63);
-            lowA = 2 *% lowA;
-        }
-    }
-
-    var high = highA -% highB;
-    var low = lowA -% lowB;
-    if (lowA < lowB) {
-        high -%= 1;
-    }
-    if (high >> 63 == 0) {
-        if ((high | low) == 0) {
-            return 0 * a;
-        }
-        highA = high;
-        lowA = low;
-    }
-
-    while ((lowA >> fractionalBits) == 0) {
-        lowA = 2 *% lowA;
-        expA = expA - 1;
-    }
-
-    // Combine the exponent with the sign and significand, normalize if happened to be denormalized
-    if (expA < -fractionalBits) {
-        return @bitCast(T, signA);
-    } else if (expA <= 0) {
-        return @bitCast(T, (lowA >> @intCast(math.Log2Int(u64), 1 - expA)) | signA);
-    } else {
-        return @bitCast(T, lowA | (@as(Z, @intCast(u16, expA)) << significandBits) | signA);
-    }
-}
-
-test {
-    _ = @import("fmodx_test.zig");
-}
diff --git a/lib/std/special/compiler_rt/fmodx_test.zig b/lib/std/special/compiler_rt/fmodx_test.zig
index a5d0887ea4a1..4bb1b5654a20 100644
--- a/lib/std/special/compiler_rt/fmodx_test.zig
+++ b/lib/std/special/compiler_rt/fmodx_test.zig
@@ -1,24 +1,24 @@
 const std = @import("std");
-const fmodx = @import("fmodx.zig");
+const fmod = @import("fmod.zig");
 const testing = std.testing;
 
 fn test_fmodx(a: f80, b: f80, exp: f80) !void {
-    const res = fmodx.fmodx(a, b);
+    const res = fmod.__fmodx(a, b);
     try testing.expect(exp == res);
 }
 
 fn test_fmodx_nans() !void {
-    try testing.expect(std.math.isNan(fmodx.fmodx(1.0, std.math.nan(f80))));
-    try testing.expect(std.math.isNan(fmodx.fmodx(1.0, -std.math.nan(f80))));
-    try testing.expect(std.math.isNan(fmodx.fmodx(std.math.nan(f80), 1.0)));
-    try testing.expect(std.math.isNan(fmodx.fmodx(-std.math.nan(f80), 1.0)));
+    try testing.expect(std.math.isNan(fmod.__fmodx(1.0, std.math.nan(f80))));
+    try testing.expect(std.math.isNan(fmod.__fmodx(1.0, -std.math.nan(f80))));
+    try testing.expect(std.math.isNan(fmod.__fmodx(std.math.nan(f80), 1.0)));
+    try testing.expect(std.math.isNan(fmod.__fmodx(-std.math.nan(f80), 1.0)));
 }
 
 fn test_fmodx_infs() !void {
-    try testing.expect(fmodx.fmodx(1.0, std.math.inf(f80)) == 1.0);
-    try testing.expect(fmodx.fmodx(1.0, -std.math.inf(f80)) == 1.0);
-    try testing.expect(std.math.isNan(fmodx.fmodx(std.math.inf(f80), 1.0)));
-    try testing.expect(std.math.isNan(fmodx.fmodx(-std.math.inf(f80), 1.0)));
+    try testing.expect(fmod.__fmodx(1.0, std.math.inf(f80)) == 1.0);
+    try testing.expect(fmod.__fmodx(1.0, -std.math.inf(f80)) == 1.0);
+    try testing.expect(std.math.isNan(fmod.__fmodx(std.math.inf(f80), 1.0)));
+    try testing.expect(std.math.isNan(fmod.__fmodx(-std.math.inf(f80), 1.0)));
 }
 
 test "fmodx" {
diff --git a/lib/std/special/compiler_rt/log.zig b/lib/std/special/compiler_rt/log.zig
new file mode 100644
index 000000000000..8b09baac2ef6
--- /dev/null
+++ b/lib/std/special/compiler_rt/log.zig
@@ -0,0 +1,168 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/lnf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/ln.c
+
+const std = @import("std");
+const math = std.math;
+const testing = std.testing;
+
+pub fn __logh(a: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, logf(a));
+}
+
+pub fn logf(x_: f32) callconv(.C) f32 {
+    const ln2_hi: f32 = 6.9313812256e-01;
+    const ln2_lo: f32 = 9.0580006145e-06;
+    const Lg1: f32 = 0xaaaaaa.0p-24;
+    const Lg2: f32 = 0xccce13.0p-25;
+    const Lg3: f32 = 0x91e9ee.0p-25;
+    const Lg4: f32 = 0xf89e26.0p-26;
+
+    var x = x_;
+    var ix = @bitCast(u32, x);
+    var k: i32 = 0;
+
+    // x < 2^(-126)
+    if (ix < 0x00800000 or ix >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f32);
+        }
+        // log(-#) = nan
+        if (ix >> 31 != 0) {
+            return math.nan(f32);
+        }
+
+        // subnormal, scale x
+        k -= 25;
+        x *= 0x1.0p25;
+        ix = @bitCast(u32, x);
+    } else if (ix >= 0x7F800000) {
+        return x;
+    } else if (ix == 0x3F800000) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    ix += 0x3F800000 - 0x3F3504F3;
+    k += @intCast(i32, ix >> 23) - 0x7F;
+    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
+    x = @bitCast(f32, ix);
+
+    const f = x - 1.0;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * Lg4);
+    const t2 = z * (Lg1 + w * Lg3);
+    const R = t2 + t1;
+    const hfsq = 0.5 * f * f;
+    const dk = @intToFloat(f32, k);
+
+    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
+}
+
+pub fn log(x_: f64) callconv(.C) f64 {
+    const ln2_hi: f64 = 6.93147180369123816490e-01;
+    const ln2_lo: f64 = 1.90821492927058770002e-10;
+    const Lg1: f64 = 6.666666666666735130e-01;
+    const Lg2: f64 = 3.999999999940941908e-01;
+    const Lg3: f64 = 2.857142874366239149e-01;
+    const Lg4: f64 = 2.222219843214978396e-01;
+    const Lg5: f64 = 1.818357216161805012e-01;
+    const Lg6: f64 = 1.531383769920937332e-01;
+    const Lg7: f64 = 1.479819860511658591e-01;
+
+    var x = x_;
+    var ix = @bitCast(u64, x);
+    var hx = @intCast(u32, ix >> 32);
+    var k: i32 = 0;
+
+    if (hx < 0x00100000 or hx >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f64);
+        }
+        // log(-#) = nan
+        if (hx >> 31 != 0) {
+            return math.nan(f64);
+        }
+
+        // subnormal, scale x
+        k -= 54;
+        x *= 0x1.0p54;
+        hx = @intCast(u32, @bitCast(u64, ix) >> 32);
+    } else if (hx >= 0x7FF00000) {
+        return x;
+    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    hx += 0x3FF00000 - 0x3FE6A09E;
+    k += @intCast(i32, hx >> 20) - 0x3FF;
+    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
+    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
+    x = @bitCast(f64, ix);
+
+    const f = x - 1.0;
+    const hfsq = 0.5 * f * f;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+    const R = t2 + t1;
+    const dk = @intToFloat(f64, k);
+
+    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
+}
+
+pub fn __logx(a: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, logq(a));
+}
+
+pub fn logq(a: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return log(@floatCast(f64, a));
+}
+
+test "ln32" {
+    const epsilon = 0.000001;
+
+    try testing.expect(math.approxEqAbs(f32, logf(0.2), -1.609438, epsilon));
+    try testing.expect(math.approxEqAbs(f32, logf(0.8923), -0.113953, epsilon));
+    try testing.expect(math.approxEqAbs(f32, logf(1.5), 0.405465, epsilon));
+    try testing.expect(math.approxEqAbs(f32, logf(37.45), 3.623007, epsilon));
+    try testing.expect(math.approxEqAbs(f32, logf(89.123), 4.490017, epsilon));
+    try testing.expect(math.approxEqAbs(f32, logf(123123.234375), 11.720941, epsilon));
+}
+
+test "ln64" {
+    const epsilon = 0.000001;
+
+    try testing.expect(math.approxEqAbs(f64, log(0.2), -1.609438, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log(0.8923), -0.113953, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log(1.5), 0.405465, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log(37.45), 3.623007, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log(89.123), 4.490017, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log(123123.234375), 11.720941, epsilon));
+}
+
+test "ln32.special" {
+    try testing.expect(math.isPositiveInf(logf(math.inf(f32))));
+    try testing.expect(math.isNegativeInf(logf(0.0)));
+    try testing.expect(math.isNan(logf(-1.0)));
+    try testing.expect(math.isNan(logf(math.nan(f32))));
+}
+
+test "ln64.special" {
+    try testing.expect(math.isPositiveInf(log(math.inf(f64))));
+    try testing.expect(math.isNegativeInf(log(0.0)));
+    try testing.expect(math.isNan(log(-1.0)));
+    try testing.expect(math.isNan(log(math.nan(f64))));
+}
diff --git a/lib/std/special/compiler_rt/log10.zig b/lib/std/special/compiler_rt/log10.zig
new file mode 100644
index 000000000000..ce06d8c6493b
--- /dev/null
+++ b/lib/std/special/compiler_rt/log10.zig
@@ -0,0 +1,196 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/log10f.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/log10.c
+
+const std = @import("std");
+const math = std.math;
+const testing = std.testing;
+const maxInt = std.math.maxInt;
+
+pub fn __log10h(a: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, log10f(a));
+}
+
+pub fn log10f(x_: f32) callconv(.C) f32 {
+    const ivln10hi: f32 = 4.3432617188e-01;
+    const ivln10lo: f32 = -3.1689971365e-05;
+    const log10_2hi: f32 = 3.0102920532e-01;
+    const log10_2lo: f32 = 7.9034151668e-07;
+    const Lg1: f32 = 0xaaaaaa.0p-24;
+    const Lg2: f32 = 0xccce13.0p-25;
+    const Lg3: f32 = 0x91e9ee.0p-25;
+    const Lg4: f32 = 0xf89e26.0p-26;
+
+    var x = x_;
+    var u = @bitCast(u32, x);
+    var ix = u;
+    var k: i32 = 0;
+
+    // x < 2^(-126)
+    if (ix < 0x00800000 or ix >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f32);
+        }
+        // log(-#) = nan
+        if (ix >> 31 != 0) {
+            return math.nan(f32);
+        }
+
+        k -= 25;
+        x *= 0x1.0p25;
+        ix = @bitCast(u32, x);
+    } else if (ix >= 0x7F800000) {
+        return x;
+    } else if (ix == 0x3F800000) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    ix += 0x3F800000 - 0x3F3504F3;
+    k += @intCast(i32, ix >> 23) - 0x7F;
+    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
+    x = @bitCast(f32, ix);
+
+    const f = x - 1.0;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * Lg4);
+    const t2 = z * (Lg1 + w * Lg3);
+    const R = t2 + t1;
+    const hfsq = 0.5 * f * f;
+
+    var hi = f - hfsq;
+    u = @bitCast(u32, hi);
+    u &= 0xFFFFF000;
+    hi = @bitCast(f32, u);
+    const lo = f - hi - hfsq + s * (hfsq + R);
+    const dk = @intToFloat(f32, k);
+
+    return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
+}
+
+pub fn log10(x_: f64) callconv(.C) f64 {
+    const ivln10hi: f64 = 4.34294481878168880939e-01;
+    const ivln10lo: f64 = 2.50829467116452752298e-11;
+    const log10_2hi: f64 = 3.01029995663611771306e-01;
+    const log10_2lo: f64 = 3.69423907715893078616e-13;
+    const Lg1: f64 = 6.666666666666735130e-01;
+    const Lg2: f64 = 3.999999999940941908e-01;
+    const Lg3: f64 = 2.857142874366239149e-01;
+    const Lg4: f64 = 2.222219843214978396e-01;
+    const Lg5: f64 = 1.818357216161805012e-01;
+    const Lg6: f64 = 1.531383769920937332e-01;
+    const Lg7: f64 = 1.479819860511658591e-01;
+
+    var x = x_;
+    var ix = @bitCast(u64, x);
+    var hx = @intCast(u32, ix >> 32);
+    var k: i32 = 0;
+
+    if (hx < 0x00100000 or hx >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f32);
+        }
+        // log(-#) = nan
+        if (hx >> 31 != 0) {
+            return math.nan(f32);
+        }
+
+        // subnormal, scale x
+        k -= 54;
+        x *= 0x1.0p54;
+        hx = @intCast(u32, @bitCast(u64, x) >> 32);
+    } else if (hx >= 0x7FF00000) {
+        return x;
+    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    hx += 0x3FF00000 - 0x3FE6A09E;
+    k += @intCast(i32, hx >> 20) - 0x3FF;
+    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
+    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
+    x = @bitCast(f64, ix);
+
+    const f = x - 1.0;
+    const hfsq = 0.5 * f * f;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+    const R = t2 + t1;
+
+    // hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
+    var hi = f - hfsq;
+    var hii = @bitCast(u64, hi);
+    hii &= @as(u64, maxInt(u64)) << 32;
+    hi = @bitCast(f64, hii);
+    const lo = f - hi - hfsq + s * (hfsq + R);
+
+    // val_hi + val_lo ~ log10(1 + f) + k * log10(2)
+    var val_hi = hi * ivln10hi;
+    const dk = @intToFloat(f64, k);
+    const y = dk * log10_2hi;
+    var val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
+
+    // Extra precision multiplication
+    const ww = y + val_hi;
+    val_lo += (y - ww) + val_hi;
+    val_hi = ww;
+
+    return val_lo + val_hi;
+}
+
+pub fn __log10x(a: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, log10q(a));
+}
+
+pub fn log10q(a: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return log10(@floatCast(f64, a));
+}
+
+test "log10_32" {
+    const epsilon = 0.000001;
+
+    try testing.expect(math.approxEqAbs(f32, log10f(0.2), -0.698970, epsilon));
+    try testing.expect(math.approxEqAbs(f32, log10f(0.8923), -0.049489, epsilon));
+    try testing.expect(math.approxEqAbs(f32, log10f(1.5), 0.176091, epsilon));
+    try testing.expect(math.approxEqAbs(f32, log10f(37.45), 1.573452, epsilon));
+    try testing.expect(math.approxEqAbs(f32, log10f(89.123), 1.94999, epsilon));
+    try testing.expect(math.approxEqAbs(f32, log10f(123123.234375), 5.09034, epsilon));
+}
+
+test "log10_64" {
+    const epsilon = 0.000001;
+
+    try testing.expect(math.approxEqAbs(f64, log10(0.2), -0.698970, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log10(0.8923), -0.049489, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log10(1.5), 0.176091, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log10(37.45), 1.573452, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log10(89.123), 1.94999, epsilon));
+    try testing.expect(math.approxEqAbs(f64, log10(123123.234375), 5.09034, epsilon));
+}
+
+test "log10_32.special" {
+    try testing.expect(math.isPositiveInf(log10f(math.inf(f32))));
+    try testing.expect(math.isNegativeInf(log10f(0.0)));
+    try testing.expect(math.isNan(log10f(-1.0)));
+    try testing.expect(math.isNan(log10f(math.nan(f32))));
+}
+
+test "log10_64.special" {
+    try testing.expect(math.isPositiveInf(log10(math.inf(f64))));
+    try testing.expect(math.isNegativeInf(log10(0.0)));
+    try testing.expect(math.isNan(log10(-1.0)));
+    try testing.expect(math.isNan(log10(math.nan(f64))));
+}
diff --git a/lib/std/special/compiler_rt/log2.zig b/lib/std/special/compiler_rt/log2.zig
new file mode 100644
index 000000000000..2c2d620c3daf
--- /dev/null
+++ b/lib/std/special/compiler_rt/log2.zig
@@ -0,0 +1,185 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/log2f.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/log2.c
+
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+const maxInt = std.math.maxInt;
+
+pub fn __log2h(a: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, log2f(a));
+}
+
+pub fn log2f(x_: f32) callconv(.C) f32 {
+    const ivln2hi: f32 = 1.4428710938e+00;
+    const ivln2lo: f32 = -1.7605285393e-04;
+    const Lg1: f32 = 0xaaaaaa.0p-24;
+    const Lg2: f32 = 0xccce13.0p-25;
+    const Lg3: f32 = 0x91e9ee.0p-25;
+    const Lg4: f32 = 0xf89e26.0p-26;
+
+    var x = x_;
+    var u = @bitCast(u32, x);
+    var ix = u;
+    var k: i32 = 0;
+
+    // x < 2^(-126)
+    if (ix < 0x00800000 or ix >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f32);
+        }
+        // log(-#) = nan
+        if (ix >> 31 != 0) {
+            return math.nan(f32);
+        }
+
+        k -= 25;
+        x *= 0x1.0p25;
+        ix = @bitCast(u32, x);
+    } else if (ix >= 0x7F800000) {
+        return x;
+    } else if (ix == 0x3F800000) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    ix += 0x3F800000 - 0x3F3504F3;
+    k += @intCast(i32, ix >> 23) - 0x7F;
+    ix = (ix & 0x007FFFFF) + 0x3F3504F3;
+    x = @bitCast(f32, ix);
+
+    const f = x - 1.0;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * Lg4);
+    const t2 = z * (Lg1 + w * Lg3);
+    const R = t2 + t1;
+    const hfsq = 0.5 * f * f;
+
+    var hi = f - hfsq;
+    u = @bitCast(u32, hi);
+    u &= 0xFFFFF000;
+    hi = @bitCast(f32, u);
+    const lo = f - hi - hfsq + s * (hfsq + R);
+    return (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + @intToFloat(f32, k);
+}
+
+pub fn log2(x_: f64) callconv(.C) f64 {
+    const ivln2hi: f64 = 1.44269504072144627571e+00;
+    const ivln2lo: f64 = 1.67517131648865118353e-10;
+    const Lg1: f64 = 6.666666666666735130e-01;
+    const Lg2: f64 = 3.999999999940941908e-01;
+    const Lg3: f64 = 2.857142874366239149e-01;
+    const Lg4: f64 = 2.222219843214978396e-01;
+    const Lg5: f64 = 1.818357216161805012e-01;
+    const Lg6: f64 = 1.531383769920937332e-01;
+    const Lg7: f64 = 1.479819860511658591e-01;
+
+    var x = x_;
+    var ix = @bitCast(u64, x);
+    var hx = @intCast(u32, ix >> 32);
+    var k: i32 = 0;
+
+    if (hx < 0x00100000 or hx >> 31 != 0) {
+        // log(+-0) = -inf
+        if (ix << 1 == 0) {
+            return -math.inf(f64);
+        }
+        // log(-#) = nan
+        if (hx >> 31 != 0) {
+            return math.nan(f64);
+        }
+
+        // subnormal, scale x
+        k -= 54;
+        x *= 0x1.0p54;
+        hx = @intCast(u32, @bitCast(u64, x) >> 32);
+    } else if (hx >= 0x7FF00000) {
+        return x;
+    } else if (hx == 0x3FF00000 and ix << 32 == 0) {
+        return 0;
+    }
+
+    // x into [sqrt(2) / 2, sqrt(2)]
+    hx += 0x3FF00000 - 0x3FE6A09E;
+    k += @intCast(i32, hx >> 20) - 0x3FF;
+    hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
+    ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
+    x = @bitCast(f64, ix);
+
+    const f = x - 1.0;
+    const hfsq = 0.5 * f * f;
+    const s = f / (2.0 + f);
+    const z = s * s;
+    const w = z * z;
+    const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+    const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+    const R = t2 + t1;
+
+    // hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
+    var hi = f - hfsq;
+    var hii = @bitCast(u64, hi);
+    hii &= @as(u64, maxInt(u64)) << 32;
+    hi = @bitCast(f64, hii);
+    const lo = f - hi - hfsq + s * (hfsq + R);
+
+    var val_hi = hi * ivln2hi;
+    var val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
+
+    // spadd(val_hi, val_lo, y)
+    const y = @intToFloat(f64, k);
+    const ww = y + val_hi;
+    val_lo += (y - ww) + val_hi;
+    val_hi = ww;
+
+    return val_lo + val_hi;
+}
+
+pub fn __log2x(a: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, log2q(a));
+}
+
+pub fn log2q(a: f128) callconv(.C) f128 {
+    return math.log2(a);
+}
+
+test "log2_32" {
+    const epsilon = 0.000001;
+
+    try expect(math.approxEqAbs(f32, log2f(0.2), -2.321928, epsilon));
+    try expect(math.approxEqAbs(f32, log2f(0.8923), -0.164399, epsilon));
+    try expect(math.approxEqAbs(f32, log2f(1.5), 0.584962, epsilon));
+    try expect(math.approxEqAbs(f32, log2f(37.45), 5.226894, epsilon));
+    try expect(math.approxEqAbs(f32, log2f(123123.234375), 16.909744, epsilon));
+}
+
+test "log2_64" {
+    const epsilon = 0.000001;
+
+    try expect(math.approxEqAbs(f64, log2(0.2), -2.321928, epsilon));
+    try expect(math.approxEqAbs(f64, log2(0.8923), -0.164399, epsilon));
+    try expect(math.approxEqAbs(f64, log2(1.5), 0.584962, epsilon));
+    try expect(math.approxEqAbs(f64, log2(37.45), 5.226894, epsilon));
+    try expect(math.approxEqAbs(f64, log2(123123.234375), 16.909744, epsilon));
+}
+
+test "log2_32.special" {
+    try expect(math.isPositiveInf(log2f(math.inf(f32))));
+    try expect(math.isNegativeInf(log2f(0.0)));
+    try expect(math.isNan(log2f(-1.0)));
+    try expect(math.isNan(log2f(math.nan(f32))));
+}
+
+test "log2_64.special" {
+    try expect(math.isPositiveInf(log2(math.inf(f64))));
+    try expect(math.isNegativeInf(log2(0.0)));
+    try expect(math.isNan(log2(-1.0)));
+    try expect(math.isNan(log2(math.nan(f64))));
+}
diff --git a/lib/std/math/__rem_pio2.zig b/lib/std/special/compiler_rt/rem_pio2.zig
similarity index 95%
rename from lib/std/math/__rem_pio2.zig
rename to lib/std/special/compiler_rt/rem_pio2.zig
index f01d8fe94a3e..73d477ee12b3 100644
--- a/lib/std/math/__rem_pio2.zig
+++ b/lib/std/special/compiler_rt/rem_pio2.zig
@@ -3,8 +3,8 @@
 //
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2.c
 
-const std = @import("../std.zig");
-const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
+const std = @import("std");
+const rem_pio2_large = @import("rem_pio2_large.zig").rem_pio2_large;
 const math = std.math;
 
 const toint = 1.5 / math.floatEps(f64);
@@ -82,10 +82,10 @@ fn medium(ix: u32, x: f64, y: *[2]f64) i32 {
 
 // Returns the remainder of x rem pi/2 in y[0]+y[1]
 //
-// use __rem_pio2_large() for large x
+// use rem_pio2_large() for large x
 //
 // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
-pub fn __rem_pio2(x: f64, y: *[2]f64) i32 {
+pub fn rem_pio2(x: f64, y: *[2]f64) i32 {
     var z: f64 = undefined;
     var tx: [3]f64 = undefined;
     var ty: [2]f64 = undefined;
@@ -186,7 +186,7 @@ pub fn __rem_pio2(x: f64, y: *[2]f64) i32 {
     while (tx[U(i)] == 0.0) {
         i -= 1;
     }
-    n = __rem_pio2_large(tx[0..], ty[0..], @intCast(i32, (ix >> 20)) - (0x3ff + 23), i + 1, 1);
+    n = rem_pio2_large(tx[0..], ty[0..], @intCast(i32, (ix >> 20)) - (0x3ff + 23), i + 1, 1);
     if (sign) {
         y[0] = -ty[0];
         y[1] = -ty[1];
diff --git a/lib/std/math/__rem_pio2_large.zig b/lib/std/special/compiler_rt/rem_pio2_large.zig
similarity index 75%
rename from lib/std/math/__rem_pio2_large.zig
rename to lib/std/special/compiler_rt/rem_pio2_large.zig
index 140e85f7f6f0..c8a53b741c07 100644
--- a/lib/std/math/__rem_pio2_large.zig
+++ b/lib/std/special/compiler_rt/rem_pio2_large.zig
@@ -3,23 +3,22 @@
 //
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2_large.c
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 
 const init_jk = [_]i32{ 3, 4, 4, 6 }; // initial value for jk
 
-//
-// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
-//
-//              integer array, contains the (24*i)-th to (24*i+23)-th
-//              bit of 2/pi after binary point. The corresponding
-//              floating value is
-//
-//                      ipio2[i] * 2^(-24(i+1)).
-//
-// NB: This table must have at least (e0-3)/24 + jk terms.
-//     For quad precision (e0 <= 16360, jk = 6), this is 686.
 ///
+/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
+///
+///              integer array, contains the (24*i)-th to (24*i+23)-th
+///              bit of 2/pi after binary point. The corresponding
+///              floating value is
+///
+///                      ipio2[i] * 2^(-24(i+1)).
+///
+/// NB: This table must have at least (e0-3)/24 + jk terms.
+///     For quad precision (e0 <= 16360, jk = 6), this is 686.
 const ipio2 = [_]i32{
     0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
     0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -33,7 +32,6 @@ const ipio2 = [_]i32{
     0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
     0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 
-    //#if LDBL_MAX_EXP > 1024
     0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
     0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
     0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
@@ -137,9 +135,7 @@ const ipio2 = [_]i32{
     0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
     0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
     0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
-    0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901,
-    0x8071E0,
-    //#endif
+    0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
 };
 
 const PIo2 = [_]f64{
@@ -157,109 +153,109 @@ fn U(x: anytype) usize {
     return @intCast(usize, x);
 }
 
-// Returns the last three digits of N with y = x - N*pi/2 so that |y| < pi/2.
-//
-// The method is to compute the integer (mod 8) and fraction parts of
-// (2/pi)*x without doing the full multiplication. In general we
-// skip the part of the product that are known to be a huge integer (
-// more accurately, = 0 mod 8 ). Thus the number of operations are
-// independent of the exponent of the input.
-//
-// (2/pi) is represented by an array of 24-bit integers in ipio2[].
-//
-// Input parameters:
-//      x[]     The input value (must be positive) is broken into nx
-//              pieces of 24-bit integers in double precision format.
-//              x[i] will be the i-th 24 bit of x. The scaled exponent
-//              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
-//              match x's up to 24 bits.
-//
-//              Example of breaking a double positive z into x[0]+x[1]+x[2]:
-//                      e0 = ilogb(z)-23
-//                      z  = scalbn(z,-e0)
-//              for i = 0,1,2
-//                      x[i] = floor(z)
-//                      z    = (z-x[i])*2**24
-//
-//
-//      y[]     ouput result in an array of double precision numbers.
-//              The dimension of y[] is:
-//                      24-bit  precision       1
-//                      53-bit  precision       2
-//                      64-bit  precision       2
-//                      113-bit precision       3
-//              The actual value is the sum of them. Thus for 113-bit
-//              precison, one may have to do something like:
-//
-//              long double t,w,r_head, r_tail;
-//              t = (long double)y[2] + (long double)y[1];
-//              w = (long double)y[0];
-//              r_head = t+w;
-//              r_tail = w - (r_head - t);
-//
-//      e0      The exponent of x[0]. Must be <= 16360 or you need to
-//              expand the ipio2 table.
-//
-//      nx      dimension of x[]
-//
-//      prec    an integer indicating the precision:
-//                      0       24  bits (single)
-//                      1       53  bits (double)
-//                      2       64  bits (extended)
-//                      3       113 bits (quad)
-//
-// Here is the description of some local variables:
-//
-//      jk      jk+1 is the initial number of terms of ipio2[] needed
-//              in the computation. The minimum and recommended value
-//              for jk is 3,4,4,6 for single, double, extended, and quad.
-//              jk+1 must be 2 larger than you might expect so that our
-//              recomputation test works. (Up to 24 bits in the integer
-//              part (the 24 bits of it that we compute) and 23 bits in
-//              the fraction part may be lost to cancelation before we
-//              recompute.)
-//
-//      jz      local integer variable indicating the number of
-//              terms of ipio2[] used.
-//
-//      jx      nx - 1
-//
-//      jv      index for pointing to the suitable ipio2[] for the
-//              computation. In general, we want
-//                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
-//              is an integer. Thus
-//                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
-//              Hence jv = max(0,(e0-3)/24).
-//
-//      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
-//
-//      q[]     double array with integral value, representing the
-//              24-bits chunk of the product of x and 2/pi.
-//
-//      q0      the corresponding exponent of q[0]. Note that the
-//              exponent for q[i] would be q0-24*i.
-//
-//      PIo2[]  double precision array, obtained by cutting pi/2
-//              into 24 bits chunks.
-//
-//      f[]     ipio2[] in floating point
-//
-//      iq[]    integer array by breaking up q[] in 24-bits chunk.
-//
-//      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
-//
-//      ih      integer. If >0 it indicates q[] is >= 0.5, hence
-//              it also indicates the *sign* of the result.
-//
+/// Returns the last three digits of N with y = x - N*pi/2 so that |y| < pi/2.
 ///
-//
-// Constants:
-// The hexadecimal values are the intended ones for the following
-// constants. The decimal values may be used, provided that the
-// compiler will convert from decimal to binary accurately enough
-// to produce the hexadecimal values shown.
+/// The method is to compute the integer (mod 8) and fraction parts of
+/// (2/pi)*x without doing the full multiplication. In general we
+/// skip the part of the product that are known to be a huge integer (
+/// more accurately, = 0 mod 8 ). Thus the number of operations are
+/// independent of the exponent of the input.
+///
+/// (2/pi) is represented by an array of 24-bit integers in ipio2[].
+///
+/// Input parameters:
+///      x[]     The input value (must be positive) is broken into nx
+///              pieces of 24-bit integers in double precision format.
+///              x[i] will be the i-th 24 bit of x. The scaled exponent
+///              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
+///              match x's up to 24 bits.
+///
+///              Example of breaking a double positive z into x[0]+x[1]+x[2]:
+///                      e0 = ilogb(z)-23
+///                      z  = scalbn(z,-e0)
+///              for i = 0,1,2
+///                      x[i] = floor(z)
+///                      z    = (z-x[i])*2**24
+///
+///
+///      y[]     ouput result in an array of double precision numbers.
+///              The dimension of y[] is:
+///                      24-bit  precision       1
+///                      53-bit  precision       2
+///                      64-bit  precision       2
+///                      113-bit precision       3
+///              The actual value is the sum of them. Thus for 113-bit
+///              precison, one may have to do something like:
+///
+///              long double t,w,r_head, r_tail;
+///              t = (long double)y[2] + (long double)y[1];
+///              w = (long double)y[0];
+///              r_head = t+w;
+///              r_tail = w - (r_head - t);
+///
+///      e0      The exponent of x[0]. Must be <= 16360 or you need to
+///              expand the ipio2 table.
+///
+///      nx      dimension of x[]
+///
+///      prec    an integer indicating the precision:
+///                      0       24  bits (single)
+///                      1       53  bits (double)
+///                      2       64  bits (extended)
+///                      3       113 bits (quad)
+///
+/// Here is the description of some local variables:
+///
+///      jk      jk+1 is the initial number of terms of ipio2[] needed
+///              in the computation. The minimum and recommended value
+///              for jk is 3,4,4,6 for single, double, extended, and quad.
+///              jk+1 must be 2 larger than you might expect so that our
+///              recomputation test works. (Up to 24 bits in the integer
+///              part (the 24 bits of it that we compute) and 23 bits in
+///              the fraction part may be lost to cancelation before we
+///              recompute.)
+///
+///      jz      local integer variable indicating the number of
+///              terms of ipio2[] used.
+///
+///      jx      nx - 1
+///
+///      jv      index for pointing to the suitable ipio2[] for the
+///              computation. In general, we want
+///                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
+///              is an integer. Thus
+///                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
+///              Hence jv = max(0,(e0-3)/24).
+///
+///      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
+///
+///      q[]     double array with integral value, representing the
+///              24-bits chunk of the product of x and 2/pi.
+///
+///      q0      the corresponding exponent of q[0]. Note that the
+///              exponent for q[i] would be q0-24*i.
+///
+///      PIo2[]  double precision array, obtained by cutting pi/2
+///              into 24 bits chunks.
+///
+///      f[]     ipio2[] in floating point
+///
+///      iq[]    integer array by breaking up q[] in 24-bits chunk.
+///
+///      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
+///
+///      ih      integer. If >0 it indicates q[] is >= 0.5, hence
+///              it also indicates the *sign* of the result.
+///
+///
+///
+/// Constants:
+/// The hexadecimal values are the intended ones for the following
+/// constants. The decimal values may be used, provided that the
+/// compiler will convert from decimal to binary accurately enough
+/// to produce the hexadecimal values shown.
 ///
-pub fn __rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
+pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
     var jz: i32 = undefined;
     var jx: i32 = undefined;
     var jv: i32 = undefined;
@@ -333,7 +329,7 @@ pub fn __rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
 
         // compute n
         z = math.scalbn(z, q0); // actual value of z
-        z -= 8.0 * math.floor(z * 0.125); // trim off integer >= 8
+        z -= 8.0 * @floor(z * 0.125); // trim off integer >= 8
         n = @floatToInt(i32, z);
         z -= @intToFloat(f64, n);
         ih = 0;
diff --git a/lib/std/math/__rem_pio2f.zig b/lib/std/special/compiler_rt/rem_pio2f.zig
similarity index 88%
rename from lib/std/math/__rem_pio2f.zig
rename to lib/std/special/compiler_rt/rem_pio2f.zig
index 5867fb30d9af..34397dd73477 100644
--- a/lib/std/math/__rem_pio2f.zig
+++ b/lib/std/special/compiler_rt/rem_pio2f.zig
@@ -3,8 +3,8 @@
 //
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2f.c
 
-const std = @import("../std.zig");
-const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
+const std = @import("std");
+const rem_pio2_large = @import("rem_pio2_large.zig").rem_pio2_large;
 const math = std.math;
 
 const toint = 1.5 / math.floatEps(f64);
@@ -19,8 +19,8 @@ const pio2_1t = 1.58932547735281966916e-08; // 0x3E5110b4, 0x611A6263
 
 // Returns the remainder of x rem pi/2 in *y
 // use double precision for everything except passing x
-// use __rem_pio2_large() for large x
-pub fn __rem_pio2f(x: f32, y: *f64) i32 {
+// use rem_pio2_large() for large x
+pub fn rem_pio2f(x: f32, y: *f64) i32 {
     var tx: [1]f64 = undefined;
     var ty: [1]f64 = undefined;
     var @"fn": f64 = undefined;
@@ -60,7 +60,7 @@ pub fn __rem_pio2f(x: f32, y: *f64) i32 {
     e0 = (ix >> 23) - (0x7f + 23); // e0 = ilogb(|x|)-23, positive
     ui = ix - (e0 << 23);
     tx[0] = @bitCast(f32, ui);
-    n = __rem_pio2_large(&tx, &ty, @intCast(i32, e0), 1, 0);
+    n = rem_pio2_large(&tx, &ty, @intCast(i32, e0), 1, 0);
     if (sign) {
         y.* = -ty[0];
         return -n;
diff --git a/lib/std/special/compiler_rt/round.zig b/lib/std/special/compiler_rt/round.zig
new file mode 100644
index 000000000000..99201efcf89a
--- /dev/null
+++ b/lib/std/special/compiler_rt/round.zig
@@ -0,0 +1,169 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/roundf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/round.c
+
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+
+pub fn __roundh(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, roundf(x));
+}
+
+pub fn roundf(x_: f32) callconv(.C) f32 {
+    const f32_toint = 1.0 / math.floatEps(f32);
+
+    var x = x_;
+    const u = @bitCast(u32, x);
+    const e = (u >> 23) & 0xFF;
+    var y: f32 = undefined;
+
+    if (e >= 0x7F + 23) {
+        return x;
+    }
+    if (u >> 31 != 0) {
+        x = -x;
+    }
+    if (e < 0x7F - 1) {
+        math.doNotOptimizeAway(x + f32_toint);
+        return 0 * @bitCast(f32, u);
+    }
+
+    y = x + f32_toint - f32_toint - x;
+    if (y > 0.5) {
+        y = y + x - 1;
+    } else if (y <= -0.5) {
+        y = y + x + 1;
+    } else {
+        y = y + x;
+    }
+
+    if (u >> 31 != 0) {
+        return -y;
+    } else {
+        return y;
+    }
+}
+
+pub fn round(x_: f64) callconv(.C) f64 {
+    const f64_toint = 1.0 / math.floatEps(f64);
+
+    var x = x_;
+    const u = @bitCast(u64, x);
+    const e = (u >> 52) & 0x7FF;
+    var y: f64 = undefined;
+
+    if (e >= 0x3FF + 52) {
+        return x;
+    }
+    if (u >> 63 != 0) {
+        x = -x;
+    }
+    if (e < 0x3ff - 1) {
+        math.doNotOptimizeAway(x + f64_toint);
+        return 0 * @bitCast(f64, u);
+    }
+
+    y = x + f64_toint - f64_toint - x;
+    if (y > 0.5) {
+        y = y + x - 1;
+    } else if (y <= -0.5) {
+        y = y + x + 1;
+    } else {
+        y = y + x;
+    }
+
+    if (u >> 63 != 0) {
+        return -y;
+    } else {
+        return y;
+    }
+}
+
+pub fn __roundx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, roundq(x));
+}
+
+pub fn roundq(x_: f128) callconv(.C) f128 {
+    const f128_toint = 1.0 / math.floatEps(f128);
+
+    var x = x_;
+    const u = @bitCast(u128, x);
+    const e = (u >> 112) & 0x7FFF;
+    var y: f128 = undefined;
+
+    if (e >= 0x3FFF + 112) {
+        return x;
+    }
+    if (u >> 127 != 0) {
+        x = -x;
+    }
+    if (e < 0x3FFF - 1) {
+        math.doNotOptimizeAway(x + f128_toint);
+        return 0 * @bitCast(f128, u);
+    }
+
+    y = x + f128_toint - f128_toint - x;
+    if (y > 0.5) {
+        y = y + x - 1;
+    } else if (y <= -0.5) {
+        y = y + x + 1;
+    } else {
+        y = y + x;
+    }
+
+    if (u >> 127 != 0) {
+        return -y;
+    } else {
+        return y;
+    }
+}
+
+test "round32" {
+    try expect(roundf(1.3) == 1.0);
+    try expect(roundf(-1.3) == -1.0);
+    try expect(roundf(0.2) == 0.0);
+    try expect(roundf(1.8) == 2.0);
+}
+
+test "round64" {
+    try expect(round(1.3) == 1.0);
+    try expect(round(-1.3) == -1.0);
+    try expect(round(0.2) == 0.0);
+    try expect(round(1.8) == 2.0);
+}
+
+test "round128" {
+    try expect(roundq(1.3) == 1.0);
+    try expect(roundq(-1.3) == -1.0);
+    try expect(roundq(0.2) == 0.0);
+    try expect(roundq(1.8) == 2.0);
+}
+
+test "round32.special" {
+    try expect(roundf(0.0) == 0.0);
+    try expect(roundf(-0.0) == -0.0);
+    try expect(math.isPositiveInf(roundf(math.inf(f32))));
+    try expect(math.isNegativeInf(roundf(-math.inf(f32))));
+    try expect(math.isNan(roundf(math.nan(f32))));
+}
+
+test "round64.special" {
+    try expect(round(0.0) == 0.0);
+    try expect(round(-0.0) == -0.0);
+    try expect(math.isPositiveInf(round(math.inf(f64))));
+    try expect(math.isNegativeInf(round(-math.inf(f64))));
+    try expect(math.isNan(round(math.nan(f64))));
+}
+
+test "round128.special" {
+    try expect(roundq(0.0) == 0.0);
+    try expect(roundq(-0.0) == -0.0);
+    try expect(math.isPositiveInf(roundq(math.inf(f128))));
+    try expect(math.isNegativeInf(roundq(-math.inf(f128))));
+    try expect(math.isNan(roundq(math.nan(f128))));
+}
diff --git a/lib/std/special/compiler_rt/sin.zig b/lib/std/special/compiler_rt/sin.zig
new file mode 100644
index 000000000000..3d5572a59f82
--- /dev/null
+++ b/lib/std/special/compiler_rt/sin.zig
@@ -0,0 +1,162 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/sinf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/sin.c
+
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+
+const trig = @import("trig.zig");
+const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
+const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+
+pub fn __sinh(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, sinf(x));
+}
+
+pub fn sinf(x: f32) callconv(.C) f32 {
+    // Small multiples of pi/2 rounded to double precision.
+    const s1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
+    const s2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
+    const s3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
+    const s4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
+
+    var ix = @bitCast(u32, x);
+    const sign = ix >> 31 != 0;
+    ix &= 0x7fffffff;
+
+    if (ix <= 0x3f490fda) { // |x| ~<= pi/4
+        if (ix < 0x39800000) { // |x| < 2**-12
+            // raise inexact if x!=0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
+            return x;
+        }
+        return trig.__sindf(x);
+    }
+    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
+        if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
+            if (sign) {
+                return -trig.__cosdf(x + s1pio2);
+            } else {
+                return trig.__cosdf(x - s1pio2);
+            }
+        }
+        return trig.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
+    }
+    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
+        if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
+            if (sign) {
+                return trig.__cosdf(x + s3pio2);
+            } else {
+                return -trig.__cosdf(x - s3pio2);
+            }
+        }
+        return trig.__sindf(if (sign) x + s4pio2 else x - s4pio2);
+    }
+
+    // sin(Inf or NaN) is NaN
+    if (ix >= 0x7f800000) {
+        return x - x;
+    }
+
+    var y: f64 = undefined;
+    const n = rem_pio2f(x, &y);
+    return switch (n & 3) {
+        0 => trig.__sindf(y),
+        1 => trig.__cosdf(y),
+        2 => trig.__sindf(-y),
+        else => -trig.__cosdf(y),
+    };
+}
+
+pub fn sin(x: f64) callconv(.C) f64 {
+    var ix = @bitCast(u64, x) >> 32;
+    ix &= 0x7fffffff;
+
+    // |x| ~< pi/4
+    if (ix <= 0x3fe921fb) {
+        if (ix < 0x3e500000) { // |x| < 2**-26
+            // raise inexact if x != 0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
+            return x;
+        }
+        return trig.__sin(x, 0.0, 0);
+    }
+
+    // sin(Inf or NaN) is NaN
+    if (ix >= 0x7ff00000) {
+        return x - x;
+    }
+
+    var y: [2]f64 = undefined;
+    const n = rem_pio2(x, &y);
+    return switch (n & 3) {
+        0 => trig.__sin(y[0], y[1], 1),
+        1 => trig.__cos(y[0], y[1]),
+        2 => -trig.__sin(y[0], y[1], 1),
+        else => -trig.__cos(y[0], y[1]),
+    };
+}
+
+pub fn __sinx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, sinq(x));
+}
+
+pub fn sinq(x: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return sin(@floatCast(f64, x));
+}
+
+test "sin32" {
+    const epsilon = 0.00001;
+
+    try expect(math.approxEqAbs(f32, sinf(0.0), 0.0, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(0.2), 0.198669, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(0.8923), 0.778517, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(1.5), 0.997495, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(-1.5), -0.997495, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(37.45), -0.246544, epsilon));
+    try expect(math.approxEqAbs(f32, sinf(89.123), 0.916166, epsilon));
+}
+
+test "sin64" {
+    const epsilon = 0.000001;
+
+    try expect(math.approxEqAbs(f64, sin(0.0), 0.0, epsilon));
+    try expect(math.approxEqAbs(f64, sin(0.2), 0.198669, epsilon));
+    try expect(math.approxEqAbs(f64, sin(0.8923), 0.778517, epsilon));
+    try expect(math.approxEqAbs(f64, sin(1.5), 0.997495, epsilon));
+    try expect(math.approxEqAbs(f64, sin(-1.5), -0.997495, epsilon));
+    try expect(math.approxEqAbs(f64, sin(37.45), -0.246543, epsilon));
+    try expect(math.approxEqAbs(f64, sin(89.123), 0.916166, epsilon));
+}
+
+test "sin32.special" {
+    try expect(sinf(0.0) == 0.0);
+    try expect(sinf(-0.0) == -0.0);
+    try expect(math.isNan(sinf(math.inf(f32))));
+    try expect(math.isNan(sinf(-math.inf(f32))));
+    try expect(math.isNan(sinf(math.nan(f32))));
+}
+
+test "sin64.special" {
+    try expect(sin(0.0) == 0.0);
+    try expect(sin(-0.0) == -0.0);
+    try expect(math.isNan(sin(math.inf(f64))));
+    try expect(math.isNan(sin(-math.inf(f64))));
+    try expect(math.isNan(sin(math.nan(f64))));
+}
+
+test "sin32 #9901" {
+    const float = @bitCast(f32, @as(u32, 0b11100011111111110000000000000000));
+    _ = sinf(float);
+}
+
+test "sin64 #9901" {
+    const float = @bitCast(f64, @as(u64, 0b1111111101000001000000001111110111111111100000000000000000000001));
+    _ = sin(float);
+}
diff --git a/lib/std/special/compiler_rt/sincos.zig b/lib/std/special/compiler_rt/sincos.zig
new file mode 100644
index 000000000000..31ebd0d1d0a8
--- /dev/null
+++ b/lib/std/special/compiler_rt/sincos.zig
@@ -0,0 +1,242 @@
+const std = @import("std");
+const math = std.math;
+const sin = @import("sin.zig");
+const cos = @import("cos.zig");
+const trig = @import("trig.zig");
+const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
+const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+
+pub fn __sincosh(x: f16, r_sin: *f16, r_cos: *f16) callconv(.C) void {
+    // TODO: more efficient implementation
+    var big_sin: f32 = undefined;
+    var big_cos: f32 = undefined;
+    sincosf(x, &big_sin, &big_cos);
+    r_sin.* = @floatCast(f16, big_sin);
+    r_cos.* = @floatCast(f16, big_cos);
+}
+
+pub fn sincosf(x: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void {
+    const sc1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
+    const sc2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
+    const sc3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
+    const sc4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
+
+    const pre_ix = @bitCast(u32, x);
+    const sign = pre_ix >> 31 != 0;
+    const ix = pre_ix & 0x7fffffff;
+
+    // |x| ~<= pi/4
+    if (ix <= 0x3f490fda) {
+        // |x| < 2**-12
+        if (ix < 0x39800000) {
+            // raise inexact if x!=0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
+            r_sin.* = x;
+            r_cos.* = 1.0;
+            return;
+        }
+        r_sin.* = trig.__sindf(x);
+        r_cos.* = trig.__cosdf(x);
+        return;
+    }
+
+    // |x| ~<= 5*pi/4
+    if (ix <= 0x407b53d1) {
+        // |x| ~<= 3pi/4
+        if (ix <= 0x4016cbe3) {
+            if (sign) {
+                r_sin.* = -trig.__cosdf(x + sc1pio2);
+                r_cos.* = trig.__sindf(x + sc1pio2);
+            } else {
+                r_sin.* = trig.__cosdf(sc1pio2 - x);
+                r_cos.* = trig.__sindf(sc1pio2 - x);
+            }
+            return;
+        }
+        //  -sin(x+c) is not correct if x+c could be 0: -0 vs +0
+        r_sin.* = -trig.__sindf(if (sign) x + sc2pio2 else x - sc2pio2);
+        r_cos.* = -trig.__cosdf(if (sign) x + sc2pio2 else x - sc2pio2);
+        return;
+    }
+
+    // |x| ~<= 9*pi/4
+    if (ix <= 0x40e231d5) {
+        // |x| ~<= 7*pi/4
+        if (ix <= 0x40afeddf) {
+            if (sign) {
+                r_sin.* = trig.__cosdf(x + sc3pio2);
+                r_cos.* = -trig.__sindf(x + sc3pio2);
+            } else {
+                r_sin.* = -trig.__cosdf(x - sc3pio2);
+                r_cos.* = trig.__sindf(x - sc3pio2);
+            }
+            return;
+        }
+        r_sin.* = trig.__sindf(if (sign) x + sc4pio2 else x - sc4pio2);
+        r_cos.* = trig.__cosdf(if (sign) x + sc4pio2 else x - sc4pio2);
+        return;
+    }
+
+    // sin(Inf or NaN) is NaN
+    if (ix >= 0x7f800000) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    // general argument reduction needed
+    var y: f64 = undefined;
+    const n = rem_pio2f(x, &y);
+    const s = trig.__sindf(y);
+    const c = trig.__cosdf(y);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
+}
+
+pub fn sincos(x: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
+    const ix = @truncate(u32, @bitCast(u64, x) >> 32) & 0x7fffffff;
+
+    // |x| ~< pi/4
+    if (ix <= 0x3fe921fb) {
+        // if |x| < 2**-27 * sqrt(2)
+        if (ix < 0x3e46a09e) {
+            // raise inexact if x != 0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
+            r_sin.* = x;
+            r_cos.* = 1.0;
+            return;
+        }
+        r_sin.* = trig.__sin(x, 0.0, 0);
+        r_cos.* = trig.__cos(x, 0.0);
+        return;
+    }
+
+    // sincos(Inf or NaN) is NaN
+    if (ix >= 0x7ff00000) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    // argument reduction needed
+    var y: [2]f64 = undefined;
+    const n = rem_pio2(x, &y);
+    const s = trig.__sin(y[0], y[1], 1);
+    const c = trig.__cos(y[0], y[1]);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
+}
+
+pub fn __sincosx(x: f80, r_sin: *f80, r_cos: *f80) callconv(.C) void {
+    // TODO: more efficient implementation
+    //return sincos_generic(f80, x, r_sin, r_cos);
+    var big_sin: f128 = undefined;
+    var big_cos: f128 = undefined;
+    sincosq(x, &big_sin, &big_cos);
+    r_sin.* = @floatCast(f80, big_sin);
+    r_cos.* = @floatCast(f80, big_cos);
+}
+
+pub fn sincosq(x: f128, r_sin: *f128, r_cos: *f128) callconv(.C) void {
+    // TODO: more correct implementation
+    //return sincos_generic(f128, x, r_sin, r_cos);
+    var small_sin: f64 = undefined;
+    var small_cos: f64 = undefined;
+    sincos(@floatCast(f64, x), &small_sin, &small_cos);
+    r_sin.* = small_sin;
+    r_cos.* = small_cos;
+}
+
+const rem_pio2_generic = @compileError("TODO");
+
+/// Ported from musl sincosl.c. Needs the following dependencies to be complete:
+/// * rem_pio2_generic ported from __rem_pio2l.c
+/// * trig.sin_generic ported from __sinl.c
+/// * trig.cos_generic ported from __cosl.c
+inline fn sincos_generic(comptime F: type, x: F, r_sin: *F, r_cos: *F) void {
+    const sc1pio4: F = 1.0 * math.pi / 4.0;
+    const bits = @typeInfo(F).Float.bits;
+    const I = std.meta.Int(.unsigned, bits);
+    const ix = @bitCast(I, x) & (math.maxInt(I) >> 1);
+    const se = @truncate(u16, ix >> (bits - 16));
+
+    if (se == 0x7fff) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    if (@bitCast(F, ix) < sc1pio4) {
+        if (se < 0x3fff - math.floatFractionalBits(F) - 1) {
+            // raise underflow if subnormal
+            if (se == 0) {
+                math.doNotOptimizeAway(x * 0x1p-120);
+            }
+            r_sin.* = x;
+            // raise inexact if x!=0
+            r_cos.* = 1.0 + x;
+            return;
+        }
+        r_sin.* = trig.sin_generic(F, x, 0, 0);
+        r_cos.* = trig.cos_generic(F, x, 0);
+        return;
+    }
+
+    var y: [2]F = undefined;
+    const n = rem_pio2_generic(F, x, &y);
+    const s = trig.sin_generic(F, y[0], y[1], 1);
+    const c = trig.cos_generic(F, y[0], y[1]);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
+}
diff --git a/lib/std/special/compiler_rt/sqrt.zig b/lib/std/special/compiler_rt/sqrt.zig
new file mode 100644
index 000000000000..ba07beb86efa
--- /dev/null
+++ b/lib/std/special/compiler_rt/sqrt.zig
@@ -0,0 +1,284 @@
+const std = @import("std");
+const math = std.math;
+
+pub fn __sqrth(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, sqrtf(x));
+}
+
+pub fn sqrtf(x: f32) callconv(.C) f32 {
+    const tiny: f32 = 1.0e-30;
+    const sign: i32 = @bitCast(i32, @as(u32, 0x80000000));
+    var ix: i32 = @bitCast(i32, x);
+
+    if ((ix & 0x7F800000) == 0x7F800000) {
+        return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
+    }
+
+    // zero
+    if (ix <= 0) {
+        if (ix & ~sign == 0) {
+            return x; // sqrt (+-0) = +-0
+        }
+        if (ix < 0) {
+            return math.snan(f32);
+        }
+    }
+
+    // normalize
+    var m = ix >> 23;
+    if (m == 0) {
+        // subnormal
+        var i: i32 = 0;
+        while (ix & 0x00800000 == 0) : (i += 1) {
+            ix <<= 1;
+        }
+        m -= i - 1;
+    }
+
+    m -= 127; // unbias exponent
+    ix = (ix & 0x007FFFFF) | 0x00800000;
+
+    if (m & 1 != 0) { // odd m, double x to even
+        ix += ix;
+    }
+
+    m >>= 1; // m = [m / 2]
+
+    // sqrt(x) bit by bit
+    ix += ix;
+    var q: i32 = 0; // q = sqrt(x)
+    var s: i32 = 0;
+    var r: i32 = 0x01000000; // r = moving bit right -> left
+
+    while (r != 0) {
+        const t = s + r;
+        if (t <= ix) {
+            s = t + r;
+            ix -= t;
+            q += r;
+        }
+        ix += ix;
+        r >>= 1;
+    }
+
+    // floating add to find rounding direction
+    if (ix != 0) {
+        var z = 1.0 - tiny; // inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (z > 1.0) {
+                q += 2;
+            } else {
+                if (q & 1 != 0) {
+                    q += 1;
+                }
+            }
+        }
+    }
+
+    ix = (q >> 1) + 0x3f000000;
+    ix += m << 23;
+    return @bitCast(f32, ix);
+}
+
+/// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
+/// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
+/// potentially some edge cases remaining that are not handled in the same way.
+pub fn sqrt(x: f64) callconv(.C) f64 {
+    const tiny: f64 = 1.0e-300;
+    const sign: u32 = 0x80000000;
+    const u = @bitCast(u64, x);
+
+    var ix0 = @intCast(u32, u >> 32);
+    var ix1 = @intCast(u32, u & 0xFFFFFFFF);
+
+    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
+    if (ix0 & 0x7FF00000 == 0x7FF00000) {
+        return x * x + x;
+    }
+
+    // sqrt(+-0) = +-0
+    if (x == 0.0) {
+        return x;
+    }
+    // sqrt(-ve) = snan
+    if (ix0 & sign != 0) {
+        return math.snan(f64);
+    }
+
+    // normalize x
+    var m = @intCast(i32, ix0 >> 20);
+    if (m == 0) {
+        // subnormal
+        while (ix0 == 0) {
+            m -= 21;
+            ix0 |= ix1 >> 11;
+            ix1 <<= 21;
+        }
+
+        // subnormal
+        var i: u32 = 0;
+        while (ix0 & 0x00100000 == 0) : (i += 1) {
+            ix0 <<= 1;
+        }
+        m -= @intCast(i32, i) - 1;
+        ix0 |= ix1 >> @intCast(u5, 32 - i);
+        ix1 <<= @intCast(u5, i);
+    }
+
+    // unbias exponent
+    m -= 1023;
+    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
+    if (m & 1 != 0) {
+        ix0 += ix0 + (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+    }
+    m >>= 1;
+
+    // sqrt(x) bit by bit
+    ix0 += ix0 + (ix1 >> 31);
+    ix1 = ix1 +% ix1;
+
+    var q: u32 = 0;
+    var q1: u32 = 0;
+    var s0: u32 = 0;
+    var s1: u32 = 0;
+    var r: u32 = 0x00200000;
+    var t: u32 = undefined;
+    var t1: u32 = undefined;
+
+    while (r != 0) {
+        t = s0 +% r;
+        if (t <= ix0) {
+            s0 = t + r;
+            ix0 -= t;
+            q += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    r = sign;
+    while (r != 0) {
+        t1 = s1 +% r;
+        t = s0;
+        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
+            s1 = t1 +% r;
+            if (t1 & sign == sign and s1 & sign == 0) {
+                s0 += 1;
+            }
+            ix0 -= t;
+            if (ix1 < t1) {
+                ix0 -= 1;
+            }
+            ix1 = ix1 -% t1;
+            q1 += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    // rounding direction
+    if (ix0 | ix1 != 0) {
+        var z = 1.0 - tiny; // raise inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (q1 == 0xFFFFFFFF) {
+                q1 = 0;
+                q += 1;
+            } else if (z > 1.0) {
+                if (q1 == 0xFFFFFFFE) {
+                    q += 1;
+                }
+                q1 += 2;
+            } else {
+                q1 += q1 & 1;
+            }
+        }
+    }
+
+    ix0 = (q >> 1) + 0x3FE00000;
+    ix1 = q1 >> 1;
+    if (q & 1 != 0) {
+        ix1 |= 0x80000000;
+    }
+
+    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
+    // behaviour at least.
+    var iix0 = @intCast(i32, ix0);
+    iix0 = iix0 +% (m << 20);
+
+    const uz = (@intCast(u64, iix0) << 32) | ix1;
+    return @bitCast(f64, uz);
+}
+
+pub fn __sqrtx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, sqrtq(x));
+}
+
+pub fn sqrtq(x: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return sqrt(@floatCast(f64, x));
+}
+
+test "sqrtf" {
+    const V = [_]f32{
+        0.0,
+        4.089288054930154,
+        7.538757127071935,
+        8.97780793672623,
+        5.304443821913729,
+        5.682408965311888,
+        0.5846878579110049,
+        3.650338664297043,
+        0.3178091951800732,
+        7.1505232436382835,
+        3.6589165881946464,
+    };
+
+    // Note that @sqrt will either generate the sqrt opcode (if supported by the
+    // target ISA) or a call to `sqrtf` otherwise.
+    for (V) |val|
+        try std.testing.expectEqual(@sqrt(val), sqrtf(val));
+}
+
+test "sqrtf special" {
+    try std.testing.expect(math.isPositiveInf(sqrtf(math.inf(f32))));
+    try std.testing.expect(sqrtf(0.0) == 0.0);
+    try std.testing.expect(sqrtf(-0.0) == -0.0);
+    try std.testing.expect(math.isNan(sqrtf(-1.0)));
+    try std.testing.expect(math.isNan(sqrtf(math.nan(f32))));
+}
+
+test "sqrt" {
+    const V = [_]f64{
+        0.0,
+        4.089288054930154,
+        7.538757127071935,
+        8.97780793672623,
+        5.304443821913729,
+        5.682408965311888,
+        0.5846878579110049,
+        3.650338664297043,
+        0.3178091951800732,
+        7.1505232436382835,
+        3.6589165881946464,
+    };
+
+    // Note that @sqrt will either generate the sqrt opcode (if supported by the
+    // target ISA) or a call to `sqrtf` otherwise.
+    for (V) |val|
+        try std.testing.expectEqual(@sqrt(val), sqrt(val));
+}
+
+test "sqrt special" {
+    try std.testing.expect(math.isPositiveInf(sqrt(math.inf(f64))));
+    try std.testing.expect(sqrt(0.0) == 0.0);
+    try std.testing.expect(sqrt(-0.0) == -0.0);
+    try std.testing.expect(math.isNan(sqrt(-1.0)));
+    try std.testing.expect(math.isNan(sqrt(math.nan(f64))));
+}
diff --git a/lib/std/math/tan.zig b/lib/std/special/compiler_rt/tan.zig
similarity index 53%
rename from lib/std/math/tan.zig
rename to lib/std/special/compiler_rt/tan.zig
index fd5950df7c25..d99f00b99e87 100644
--- a/lib/std/math/tan.zig
+++ b/lib/std/special/compiler_rt/tan.zig
@@ -5,30 +5,20 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
 // https://golang.org/src/math/tan.go
 
-const std = @import("../std.zig");
+const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;
 
-const kernel = @import("__trig.zig");
-const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
-const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
-
-/// Returns the tangent of the radian value x.
-///
-/// Special Cases:
-///  - tan(+-0)   = +-0
-///  - tan(+-inf) = nan
-///  - tan(nan)   = nan
-pub fn tan(x: anytype) @TypeOf(x) {
-    const T = @TypeOf(x);
-    return switch (T) {
-        f32 => tan32(x),
-        f64 => tan64(x),
-        else => @compileError("tan not implemented for " ++ @typeName(T)),
-    };
+const kernel = @import("trig.zig");
+const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
+const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+
+pub fn __tanh(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, tanf(x));
 }
 
-fn tan32(x: f32) f32 {
+pub fn tanf(x: f32) callconv(.C) f32 {
     // Small multiples of pi/2 rounded to double precision.
     const t1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
     const t2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
@@ -68,11 +58,11 @@ fn tan32(x: f32) f32 {
     }
 
     var y: f64 = undefined;
-    const n = __rem_pio2f(x, &y);
+    const n = rem_pio2f(x, &y);
     return kernel.__tandf(y, n & 1 != 0);
 }
 
-fn tan64(x: f64) f64 {
+pub fn tan(x: f64) callconv(.C) f64 {
     var ix = @bitCast(u64, x) >> 32;
     ix &= 0x7fffffff;
 
@@ -92,49 +82,59 @@ fn tan64(x: f64) f64 {
     }
 
     var y: [2]f64 = undefined;
-    const n = __rem_pio2(x, &y);
+    const n = rem_pio2(x, &y);
     return kernel.__tan(y[0], y[1], n & 1 != 0);
 }
 
-test "math.tan" {
-    try expect(tan(@as(f32, 0.0)) == tan32(0.0));
-    try expect(tan(@as(f64, 0.0)) == tan64(0.0));
+pub fn __tanx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, tanq(x));
+}
+
+pub fn tanq(x: f128) callconv(.C) f128 {
+    // TODO: more correct implementation
+    return tan(@floatCast(f64, x));
+}
+
+test "tan" {
+    try expect(tan(@as(f32, 0.0)) == tanf(0.0));
+    try expect(tan(@as(f64, 0.0)) == tan(0.0));
 }
 
-test "math.tan32" {
+test "tan32" {
     const epsilon = 0.00001;
 
-    try expect(math.approxEqAbs(f32, tan32(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f32, tan32(0.2), 0.202710, epsilon));
-    try expect(math.approxEqAbs(f32, tan32(0.8923), 1.240422, epsilon));
-    try expect(math.approxEqAbs(f32, tan32(1.5), 14.101420, epsilon));
-    try expect(math.approxEqAbs(f32, tan32(37.45), -0.254397, epsilon));
-    try expect(math.approxEqAbs(f32, tan32(89.123), 2.285852, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(0.0), 0.0, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(0.2), 0.202710, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(0.8923), 1.240422, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(1.5), 14.101420, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(37.45), -0.254397, epsilon));
+    try expect(math.approxEqAbs(f32, tanf(89.123), 2.285852, epsilon));
 }
 
-test "math.tan64" {
+test "tan64" {
     const epsilon = 0.000001;
 
-    try expect(math.approxEqAbs(f64, tan64(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f64, tan64(0.2), 0.202710, epsilon));
-    try expect(math.approxEqAbs(f64, tan64(0.8923), 1.240422, epsilon));
-    try expect(math.approxEqAbs(f64, tan64(1.5), 14.101420, epsilon));
-    try expect(math.approxEqAbs(f64, tan64(37.45), -0.254397, epsilon));
-    try expect(math.approxEqAbs(f64, tan64(89.123), 2.2858376, epsilon));
+    try expect(math.approxEqAbs(f64, tan(0.0), 0.0, epsilon));
+    try expect(math.approxEqAbs(f64, tan(0.2), 0.202710, epsilon));
+    try expect(math.approxEqAbs(f64, tan(0.8923), 1.240422, epsilon));
+    try expect(math.approxEqAbs(f64, tan(1.5), 14.101420, epsilon));
+    try expect(math.approxEqAbs(f64, tan(37.45), -0.254397, epsilon));
+    try expect(math.approxEqAbs(f64, tan(89.123), 2.2858376, epsilon));
 }
 
-test "math.tan32.special" {
-    try expect(tan32(0.0) == 0.0);
-    try expect(tan32(-0.0) == -0.0);
-    try expect(math.isNan(tan32(math.inf(f32))));
-    try expect(math.isNan(tan32(-math.inf(f32))));
-    try expect(math.isNan(tan32(math.nan(f32))));
+test "tan32.special" {
+    try expect(tanf(0.0) == 0.0);
+    try expect(tanf(-0.0) == -0.0);
+    try expect(math.isNan(tanf(math.inf(f32))));
+    try expect(math.isNan(tanf(-math.inf(f32))));
+    try expect(math.isNan(tanf(math.nan(f32))));
 }
 
-test "math.tan64.special" {
-    try expect(tan64(0.0) == 0.0);
-    try expect(tan64(-0.0) == -0.0);
-    try expect(math.isNan(tan64(math.inf(f64))));
-    try expect(math.isNan(tan64(-math.inf(f64))));
-    try expect(math.isNan(tan64(math.nan(f64))));
+test "tan64.special" {
+    try expect(tan(0.0) == 0.0);
+    try expect(tan(-0.0) == -0.0);
+    try expect(math.isNan(tan(math.inf(f64))));
+    try expect(math.isNan(tan(-math.inf(f64))));
+    try expect(math.isNan(tan(math.nan(f64))));
 }
diff --git a/lib/std/math/__trig.zig b/lib/std/special/compiler_rt/trig.zig
similarity index 60%
rename from lib/std/math/__trig.zig
rename to lib/std/special/compiler_rt/trig.zig
index 0c08ed58bde1..8ece83515e21 100644
--- a/lib/std/math/__trig.zig
+++ b/lib/std/special/compiler_rt/trig.zig
@@ -8,41 +8,41 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
 
-// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
-// Input x is assumed to be bounded by ~pi/4 in magnitude.
-// Input y is the tail of x.
-//
-// Algorithm
-//      1. Since cos(-x) = cos(x), we need only to consider positive x.
-//      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
-//      3. cos(x) is approximated by a polynomial of degree 14 on
-//         [0,pi/4]
-//                                       4            14
-//              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
-//         where the remez error is
-//
-//      |              2     4     6     8     10    12     14 |     -58
-//      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
-//      |                                                      |
-//
-//                     4     6     8     10    12     14
-//      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
-//             cos(x) ~ 1 - x*x/2 + r
-//         since cos(x+y) ~ cos(x) - sin(x)*y
-//                        ~ cos(x) - x*y,
-//         a correction term is necessary in cos(x) and hence
-//              cos(x+y) = 1 - (x*x/2 - (r - x*y))
-//         For better accuracy, rearrange to
-//              cos(x+y) ~ w + (tmp + (r-x*y))
-//         where w = 1 - x*x/2 and tmp is a tiny correction term
-//         (1 - x*x/2 == w + tmp exactly in infinite precision).
-//         The exactness of w + tmp in infinite precision depends on w
-//         and tmp having the same precision as x.  If they have extra
-//         precision due to compiler bugs, then the extra precision is
-//         only good provided it is retained in all terms of the final
-//         expression for cos().  Retention happens in all cases tested
-//         under FreeBSD, so don't pessimize things by forcibly clipping
-//         any extra precision in w.
+/// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
+/// Input x is assumed to be bounded by ~pi/4 in magnitude.
+/// Input y is the tail of x.
+///
+/// Algorithm
+///      1. Since cos(-x) = cos(x), we need only to consider positive x.
+///      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
+///      3. cos(x) is approximated by a polynomial of degree 14 on
+///         [0,pi/4]
+///                                       4            14
+///              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
+///         where the remez error is
+///
+///      |              2     4     6     8     10    12     14 |     -58
+///      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
+///      |                                                      |
+///
+///                     4     6     8     10    12     14
+///      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
+///             cos(x) ~ 1 - x*x/2 + r
+///         since cos(x+y) ~ cos(x) - sin(x)*y
+///                        ~ cos(x) - x*y,
+///         a correction term is necessary in cos(x) and hence
+///              cos(x+y) = 1 - (x*x/2 - (r - x*y))
+///         For better accuracy, rearrange to
+///              cos(x+y) ~ w + (tmp + (r-x*y))
+///         where w = 1 - x*x/2 and tmp is a tiny correction term
+///         (1 - x*x/2 == w + tmp exactly in infinite precision).
+///         The exactness of w + tmp in infinite precision depends on w
+///         and tmp having the same precision as x.  If they have extra
+///         precision due to compiler bugs, then the extra precision is
+///         only good provided it is retained in all terms of the final
+///         expression for cos().  Retention happens in all cases tested
+///         under FreeBSD, so don't pessimize things by forcibly clipping
+///         any extra precision in w.
 pub fn __cos(x: f64, y: f64) f64 {
     const C1 = 4.16666666666666019037e-02; // 0x3FA55555, 0x5555554C
     const C2 = -1.38888888888741095749e-03; // 0xBF56C16C, 0x16C15177
@@ -73,33 +73,33 @@ pub fn __cosdf(x: f64) f32 {
     return @floatCast(f32, ((1.0 + z * C0) + w * C1) + (w * z) * r);
 }
 
-// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
-// Input x is assumed to be bounded by ~pi/4 in magnitude.
-// Input y is the tail of x.
-// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
-//
-// Algorithm
-//      1. Since sin(-x) = -sin(x), we need only to consider positive x.
-//      2. Callers must return sin(-0) = -0 without calling here since our
-//         odd polynomial is not evaluated in a way that preserves -0.
-//         Callers may do the optimization sin(x) ~ x for tiny x.
-//      3. sin(x) is approximated by a polynomial of degree 13 on
-//         [0,pi/4]
-//                               3            13
-//              sin(x) ~ x + S1*x + ... + S6*x
-//         where
-//
-//      |sin(x)         2     4     6     8     10     12  |     -58
-//      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
-//      |  x                                               |
-//
-//      4. sin(x+y) = sin(x) + sin'(x')*y
-//                  ~ sin(x) + (1-x*x/2)*y
-//         For better accuracy, let
-//                   3      2      2      2      2
-//              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
-//         then                   3    2
-//              sin(x) = x + (S1*x + (x *(r-y/2)+y))
+/// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
+/// Input x is assumed to be bounded by ~pi/4 in magnitude.
+/// Input y is the tail of x.
+/// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
+///
+/// Algorithm
+///      1. Since sin(-x) = -sin(x), we need only to consider positive x.
+///      2. Callers must return sin(-0) = -0 without calling here since our
+///         odd polynomial is not evaluated in a way that preserves -0.
+///         Callers may do the optimization sin(x) ~ x for tiny x.
+///      3. sin(x) is approximated by a polynomial of degree 13 on
+///         [0,pi/4]
+///                               3            13
+///              sin(x) ~ x + S1*x + ... + S6*x
+///         where
+///
+///      |sin(x)         2     4     6     8     10     12  |     -58
+///      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
+///      |  x                                               |
+///
+///      4. sin(x+y) = sin(x) + sin'(x')*y
+///                  ~ sin(x) + (1-x*x/2)*y
+///         For better accuracy, let
+///                   3      2      2      2      2
+///              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
+///         then                   3    2
+///              sin(x) = x + (S1*x + (x *(r-y/2)+y))
 pub fn __sin(x: f64, y: f64, iy: i32) f64 {
     const S1 = -1.66666666666666324348e-01; // 0xBFC55555, 0x55555549
     const S2 = 8.33333333332248946124e-03; // 0x3F811111, 0x1110F8A6
@@ -134,38 +134,38 @@ pub fn __sindf(x: f64) f32 {
     return @floatCast(f32, (x + s * (S1 + z * S2)) + s * w * r);
 }
 
-// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
-// Input x is assumed to be bounded by ~pi/4 in magnitude.
-// Input y is the tail of x.
-// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
-//
-// Algorithm
-//      1. Since tan(-x) = -tan(x), we need only to consider positive x.
-//      2. Callers must return tan(-0) = -0 without calling here since our
-//         odd polynomial is not evaluated in a way that preserves -0.
-//         Callers may do the optimization tan(x) ~ x for tiny x.
-//      3. tan(x) is approximated by a odd polynomial of degree 27 on
-//         [0,0.67434]
-//                               3             27
-//              tan(x) ~ x + T1*x + ... + T13*x
-//         where
-//
-//              |tan(x)         2     4            26   |     -59.2
-//              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
-//              |  x                                    |
-//
-//         Note: tan(x+y) = tan(x) + tan'(x)*y
-//                        ~ tan(x) + (1+x*x)*y
-//         Therefore, for better accuracy in computing tan(x+y), let
-//                   3      2      2       2       2
-//              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
-//         then
-//                                  3    2
-//              tan(x+y) = x + (T1*x + (x *(r+y)+y))
-//
-//      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
-//              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
-//                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
+/// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
+/// Input x is assumed to be bounded by ~pi/4 in magnitude.
+/// Input y is the tail of x.
+/// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
+///
+/// Algorithm
+///      1. Since tan(-x) = -tan(x), we need only to consider positive x.
+///      2. Callers must return tan(-0) = -0 without calling here since our
+///         odd polynomial is not evaluated in a way that preserves -0.
+///         Callers may do the optimization tan(x) ~ x for tiny x.
+///      3. tan(x) is approximated by a odd polynomial of degree 27 on
+///         [0,0.67434]
+///                               3             27
+///              tan(x) ~ x + T1*x + ... + T13*x
+///         where
+///
+///              |tan(x)         2     4            26   |     -59.2
+///              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
+///              |  x                                    |
+///
+///         Note: tan(x+y) = tan(x) + tan'(x)*y
+///                        ~ tan(x) + (1+x*x)*y
+///         Therefore, for better accuracy in computing tan(x+y), let
+///                   3      2      2       2       2
+///              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
+///         then
+///                                  3    2
+///              tan(x+y) = x + (T1*x + (x *(r+y)+y))
+///
+///      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
+///              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
+///                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
 pub fn __tan(x_: f64, y_: f64, odd: bool) f64 {
     var x = x_;
     var y = y_;
diff --git a/lib/std/special/compiler_rt/trunc.zig b/lib/std/special/compiler_rt/trunc.zig
new file mode 100644
index 000000000000..5406f9a02d4d
--- /dev/null
+++ b/lib/std/special/compiler_rt/trunc.zig
@@ -0,0 +1,124 @@
+// Ported from musl, which is licensed under the MIT license:
+// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
+//
+// https://git.musl-libc.org/cgit/musl/tree/src/math/truncf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/trunc.c
+
+const std = @import("std");
+const math = std.math;
+const expect = std.testing.expect;
+
+pub fn __trunch(x: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, truncf(x));
+}
+
+pub fn truncf(x: f32) callconv(.C) f32 {
+    const u = @bitCast(u32, x);
+    var e = @intCast(i32, ((u >> 23) & 0xFF)) - 0x7F + 9;
+    var m: u32 = undefined;
+
+    if (e >= 23 + 9) {
+        return x;
+    }
+    if (e < 9) {
+        e = 1;
+    }
+
+    m = @as(u32, math.maxInt(u32)) >> @intCast(u5, e);
+    if (u & m == 0) {
+        return x;
+    } else {
+        math.doNotOptimizeAway(x + 0x1p120);
+        return @bitCast(f32, u & ~m);
+    }
+}
+
+pub fn trunc(x: f64) callconv(.C) f64 {
+    const u = @bitCast(u64, x);
+    var e = @intCast(i32, ((u >> 52) & 0x7FF)) - 0x3FF + 12;
+    var m: u64 = undefined;
+
+    if (e >= 52 + 12) {
+        return x;
+    }
+    if (e < 12) {
+        e = 1;
+    }
+
+    m = @as(u64, math.maxInt(u64)) >> @intCast(u6, e);
+    if (u & m == 0) {
+        return x;
+    } else {
+        math.doNotOptimizeAway(x + 0x1p120);
+        return @bitCast(f64, u & ~m);
+    }
+}
+
+pub fn __truncx(x: f80) callconv(.C) f80 {
+    // TODO: more efficient implementation
+    return @floatCast(f80, truncq(x));
+}
+
+pub fn truncq(x: f128) callconv(.C) f128 {
+    const u = @bitCast(u128, x);
+    var e = @intCast(i32, ((u >> 112) & 0x7FFF)) - 0x3FFF + 16;
+    var m: u128 = undefined;
+
+    if (e >= 112 + 16) {
+        return x;
+    }
+    if (e < 16) {
+        e = 1;
+    }
+
+    m = @as(u128, math.maxInt(u128)) >> @intCast(u7, e);
+    if (u & m == 0) {
+        return x;
+    } else {
+        math.doNotOptimizeAway(x + 0x1p120);
+        return @bitCast(f128, u & ~m);
+    }
+}
+
+test "trunc32" {
+    try expect(truncf(1.3) == 1.0);
+    try expect(truncf(-1.3) == -1.0);
+    try expect(truncf(0.2) == 0.0);
+}
+
+test "trunc64" {
+    try expect(trunc(1.3) == 1.0);
+    try expect(trunc(-1.3) == -1.0);
+    try expect(trunc(0.2) == 0.0);
+}
+
+test "trunc128" {
+    try expect(truncq(1.3) == 1.0);
+    try expect(truncq(-1.3) == -1.0);
+    try expect(truncq(0.2) == 0.0);
+}
+
+test "trunc32.special" {
+    try expect(truncf(0.0) == 0.0); // 0x3F800000
+    try expect(truncf(-0.0) == -0.0);
+    try expect(math.isPositiveInf(truncf(math.inf(f32))));
+    try expect(math.isNegativeInf(truncf(-math.inf(f32))));
+    try expect(math.isNan(truncf(math.nan(f32))));
+}
+
+test "trunc64.special" {
+    try expect(trunc(0.0) == 0.0);
+    try expect(trunc(-0.0) == -0.0);
+    try expect(math.isPositiveInf(trunc(math.inf(f64))));
+    try expect(math.isNegativeInf(trunc(-math.inf(f64))));
+    try expect(math.isNan(trunc(math.nan(f64))));
+}
+
+test "trunc128.special" {
+    try expect(truncq(0.0) == 0.0);
+    try expect(truncq(-0.0) == -0.0);
+    try expect(math.isPositiveInf(truncq(math.inf(f128))));
+    try expect(math.isNegativeInf(truncq(-math.inf(f128))));
+    try expect(math.isNan(truncq(math.nan(f128))));
+}
diff --git a/lib/std/testing.zig b/lib/std/testing.zig
index 004e2d0fa7af..cfdf300c045d 100644
--- a/lib/std/testing.zig
+++ b/lib/std/testing.zig
@@ -265,7 +265,7 @@ pub fn expectApproxEqRel(expected: anytype, actual: @TypeOf(expected), tolerance
 test "expectApproxEqRel" {
     inline for ([_]type{ f16, f32, f64, f128 }) |T| {
         const eps_value = comptime math.epsilon(T);
-        const sqrt_eps_value = comptime math.sqrt(eps_value);
+        const sqrt_eps_value = comptime @sqrt(eps_value);
 
         const pos_x: T = 12.0;
         const pos_y: T = pos_x + 2 * eps_value;
diff --git a/src/Air.zig b/src/Air.zig
index d02491ff8906..0968d9518037 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -249,12 +249,15 @@ pub const Inst = struct {
         /// Square root of a floating point number.
         /// Uses the `un_op` field.
         sqrt,
-        /// Sine a floating point number.
+        /// Sine function on a floating point number.
         /// Uses the `un_op` field.
         sin,
-        /// Cosine a floating point number.
+        /// Cosine function on a floating point number.
         /// Uses the `un_op` field.
         cos,
+        /// Tangent function on a floating point number.
+        /// Uses the `un_op` field.
+        tan,
         /// Base e exponential of a floating point number.
         /// Uses the `un_op` field.
         exp,
@@ -921,6 +924,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index 34b29b28fb01..230b46a489f9 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -2237,7 +2237,6 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner
             .field_call_bind,
             .field_ptr_named,
             .field_val_named,
-            .field_call_bind_named,
             .func,
             .func_inferred,
             .int,
@@ -2329,6 +2328,7 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
@@ -7259,6 +7259,7 @@ fn builtinCall(
         .sqrt                  => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .sqrt),
         .sin                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .sin),
         .cos                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .cos),
+        .tan                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .tan),
         .exp                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .exp),
         .exp2                  => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .exp2),
         .log                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .log),
@@ -7947,7 +7948,8 @@ fn calleeExpr(
             if (std.mem.eql(u8, builtin_name, "@field") and params.len == 2) {
                 const lhs = try expr(gz, scope, .ref, params[0]);
                 const field_name = try comptimeExpr(gz, scope, .{ .ty = .const_slice_u8_type }, params[1]);
-                return gz.addPlNode(.field_call_bind_named, node, Zir.Inst.FieldNamed{
+                return gz.addExtendedPayload(.field_call_bind_named, Zir.Inst.FieldNamedNode{
+                    .node = gz.nodeIndexToRelative(node),
                     .lhs = lhs,
                     .field_name = field_name,
                 });
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index 3bf7224fabcd..04cad1935452 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -89,6 +89,7 @@ pub const Tag = enum {
     sqrt,
     sin,
     cos,
+    tan,
     exp,
     exp2,
     log,
@@ -771,6 +772,13 @@ pub const list = list: {
                 .param_count = 1,
             },
         },
+        .{
+            "@tan",
+            .{
+                .tag = .tan,
+                .param_count = 1,
+            },
+        },
         .{
             "@exp",
             .{
diff --git a/src/Liveness.zig b/src/Liveness.zig
index be4344ab9063..e606c15b4bb3 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -422,6 +422,7 @@ fn analyzeInst(
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/Sema.zig b/src/Sema.zig
index 8abcbd47ed49..3fa0353e9d15 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -743,7 +743,6 @@ fn analyzeBodyInner(
             .field_val                    => try sema.zirFieldVal(block, inst),
             .field_val_named              => try sema.zirFieldValNamed(block, inst),
             .field_call_bind              => try sema.zirFieldCallBind(block, inst),
-            .field_call_bind_named        => try sema.zirFieldCallBindNamed(block, inst),
             .func                         => try sema.zirFunc(block, inst, false),
             .func_inferred                => try sema.zirFunc(block, inst, true),
             .import                       => try sema.zirImport(block, inst),
@@ -855,6 +854,7 @@ fn analyzeBodyInner(
             .sqrt  => try sema.zirUnaryMath(block, inst, .sqrt, Value.sqrt),
             .sin   => try sema.zirUnaryMath(block, inst, .sin, Value.sin),
             .cos   => try sema.zirUnaryMath(block, inst, .cos, Value.cos),
+            .tan   => try sema.zirUnaryMath(block, inst, .tan, Value.tan),
             .exp   => try sema.zirUnaryMath(block, inst, .exp, Value.exp),
             .exp2  => try sema.zirUnaryMath(block, inst, .exp2, Value.exp2),
             .log   => try sema.zirUnaryMath(block, inst, .log, Value.log),
@@ -910,35 +910,36 @@ fn analyzeBodyInner(
                 const extended = datas[inst].extended;
                 break :ext switch (extended.opcode) {
                     // zig fmt: off
-                    .func               => try sema.zirFuncExtended(      block, extended, inst),
-                    .variable           => try sema.zirVarExtended(       block, extended),
-                    .struct_decl        => try sema.zirStructDecl(        block, extended, inst),
-                    .enum_decl          => try sema.zirEnumDecl(          block, extended),
-                    .union_decl         => try sema.zirUnionDecl(         block, extended, inst),
-                    .opaque_decl        => try sema.zirOpaqueDecl(        block, extended),
-                    .ret_ptr            => try sema.zirRetPtr(            block, extended),
-                    .ret_type           => try sema.zirRetType(           block, extended),
-                    .this               => try sema.zirThis(              block, extended),
-                    .ret_addr           => try sema.zirRetAddr(           block, extended),
-                    .builtin_src        => try sema.zirBuiltinSrc(        block, extended),
-                    .error_return_trace => try sema.zirErrorReturnTrace(  block, extended),
-                    .frame              => try sema.zirFrame(             block, extended),
-                    .frame_address      => try sema.zirFrameAddress(      block, extended),
-                    .alloc              => try sema.zirAllocExtended(     block, extended),
-                    .builtin_extern     => try sema.zirBuiltinExtern(     block, extended),
-                    .@"asm"             => try sema.zirAsm(               block, extended),
-                    .typeof_peer        => try sema.zirTypeofPeer(        block, extended),
-                    .compile_log        => try sema.zirCompileLog(        block, extended),
-                    .add_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .sub_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .mul_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .shl_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .c_undef            => try sema.zirCUndef(            block, extended),
-                    .c_include          => try sema.zirCInclude(          block, extended),
-                    .c_define           => try sema.zirCDefine(           block, extended),
-                    .wasm_memory_size   => try sema.zirWasmMemorySize(    block, extended),
-                    .wasm_memory_grow   => try sema.zirWasmMemoryGrow(    block, extended),
-                    .prefetch           => try sema.zirPrefetch(          block, extended),
+                    .func                  => try sema.zirFuncExtended(      block, extended, inst),
+                    .variable              => try sema.zirVarExtended(       block, extended),
+                    .struct_decl           => try sema.zirStructDecl(        block, extended, inst),
+                    .enum_decl             => try sema.zirEnumDecl(          block, extended),
+                    .union_decl            => try sema.zirUnionDecl(         block, extended, inst),
+                    .opaque_decl           => try sema.zirOpaqueDecl(        block, extended),
+                    .ret_ptr               => try sema.zirRetPtr(            block, extended),
+                    .ret_type              => try sema.zirRetType(           block, extended),
+                    .this                  => try sema.zirThis(              block, extended),
+                    .ret_addr              => try sema.zirRetAddr(           block, extended),
+                    .builtin_src           => try sema.zirBuiltinSrc(        block, extended),
+                    .error_return_trace    => try sema.zirErrorReturnTrace(  block, extended),
+                    .frame                 => try sema.zirFrame(             block, extended),
+                    .frame_address         => try sema.zirFrameAddress(      block, extended),
+                    .alloc                 => try sema.zirAllocExtended(     block, extended),
+                    .builtin_extern        => try sema.zirBuiltinExtern(     block, extended),
+                    .@"asm"                => try sema.zirAsm(               block, extended),
+                    .typeof_peer           => try sema.zirTypeofPeer(        block, extended),
+                    .compile_log           => try sema.zirCompileLog(        block, extended),
+                    .add_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .sub_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .mul_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .shl_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .c_undef               => try sema.zirCUndef(            block, extended),
+                    .c_include             => try sema.zirCInclude(          block, extended),
+                    .c_define              => try sema.zirCDefine(           block, extended),
+                    .wasm_memory_size      => try sema.zirWasmMemorySize(    block, extended),
+                    .wasm_memory_grow      => try sema.zirWasmMemoryGrow(    block, extended),
+                    .prefetch              => try sema.zirPrefetch(          block, extended),
+                    .field_call_bind_named => try sema.zirFieldCallBindNamed(block, extended),
                     // zig fmt: on
                     .dbg_block_begin => {
                         dbg_block_begins += 1;
@@ -6938,14 +6939,13 @@ fn zirFieldPtrNamed(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErr
     return sema.fieldPtr(block, src, object_ptr, field_name, field_name_src);
 }
 
-fn zirFieldCallBindNamed(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
+fn zirFieldCallBindNamed(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
-    const src = inst_data.src();
-    const field_name_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
-    const extra = sema.code.extraData(Zir.Inst.FieldNamed, inst_data.payload_index).data;
+    const extra = sema.code.extraData(Zir.Inst.FieldNamedNode, extended.operand).data;
+    const src: LazySrcLoc = .{ .node_offset = extra.node };
+    const field_name_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node };
     const object_ptr = sema.resolveInst(extra.lhs);
     const field_name = try sema.resolveConstString(block, field_name_src, extra.field_name);
     return sema.fieldCallBind(block, src, object_ptr, field_name, field_name_src);
@@ -14051,7 +14051,7 @@ fn zirFloatToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
         const result_val = val.floatToInt(sema.arena, operand_ty, dest_ty, target) catch |err| switch (err) {
             error.FloatCannotFit => {
                 return sema.fail(block, operand_src, "integer value {d} cannot be stored in type '{}'", .{
-                    std.math.floor(val.toFloat(f64)),
+                    @floor(val.toFloat(f64)),
                     dest_ty.fmt(sema.mod),
                 });
             },
@@ -18371,7 +18371,7 @@ fn coerce(
                 }
                 const result_val = val.floatToInt(sema.arena, inst_ty, dest_ty, target) catch |err| switch (err) {
                     error.FloatCannotFit => {
-                        return sema.fail(block, inst_src, "integer value {d} cannot be stored in type '{}'", .{ std.math.floor(val.toFloat(f64)), dest_ty.fmt(sema.mod) });
+                        return sema.fail(block, inst_src, "integer value {d} cannot be stored in type '{}'", .{ @floor(val.toFloat(f64)), dest_ty.fmt(sema.mod) });
                     },
                     else => |e| return e,
                 };
diff --git a/src/Zir.zig b/src/Zir.zig
index 8fe527679242..f4c62a6f24b4 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -407,15 +407,6 @@ pub const Inst = struct {
         /// The field name is a comptime instruction. Used by @field.
         /// Uses `pl_node` field. The AST node is the builtin call. Payload is FieldNamed.
         field_val_named,
-        /// Given a pointer to a struct or object that contains virtual fields, returns the
-        /// named field.  If there is no named field, searches in the type for a decl that
-        /// matches the field name.  The decl is resolved and we ensure that it's a function
-        /// which can accept the object as the first parameter, with one pointer fixup.  If
-        /// all of that works, this instruction produces a special "bound function" value
-        /// which contains both the function and the saved first parameter value.
-        /// Bound functions may only be used as the function parameter to a `call` or
-        /// `builtin_call` instruction.  Any other use is invalid zir and may crash the compiler.
-        field_call_bind_named,
         /// Returns a function type, or a function instance, depending on whether
         /// the body_len is 0. Calling convention is auto.
         /// Uses the `pl_node` union field. `payload_index` points to a `Func`.
@@ -797,6 +788,8 @@ pub const Inst = struct {
         sin,
         /// Implement builtin `@cos`. Uses `un_node`.
         cos,
+        /// Implement builtin `@tan`. Uses `un_node`.
+        tan,
         /// Implement builtin `@exp`. Uses `un_node`.
         exp,
         /// Implement builtin `@exp2`. Uses `un_node`.
@@ -1069,7 +1062,6 @@ pub const Inst = struct {
                 .field_call_bind,
                 .field_ptr_named,
                 .field_val_named,
-                .field_call_bind_named,
                 .func,
                 .func_inferred,
                 .has_decl,
@@ -1179,6 +1171,7 @@ pub const Inst = struct {
                 .sqrt,
                 .sin,
                 .cos,
+                .tan,
                 .exp,
                 .exp2,
                 .log,
@@ -1358,7 +1351,6 @@ pub const Inst = struct {
                 .field_call_bind,
                 .field_ptr_named,
                 .field_val_named,
-                .field_call_bind_named,
                 .func,
                 .func_inferred,
                 .has_decl,
@@ -1451,6 +1443,7 @@ pub const Inst = struct {
                 .sqrt,
                 .sin,
                 .cos,
+                .tan,
                 .exp,
                 .exp2,
                 .log,
@@ -1607,7 +1600,6 @@ pub const Inst = struct {
                 .field_ptr_named = .pl_node,
                 .field_val_named = .pl_node,
                 .field_call_bind = .pl_node,
-                .field_call_bind_named = .pl_node,
                 .func = .pl_node,
                 .func_inferred = .pl_node,
                 .import = .str_tok,
@@ -1713,6 +1705,7 @@ pub const Inst = struct {
                 .sqrt = .un_node,
                 .sin = .un_node,
                 .cos = .un_node,
+                .tan = .un_node,
                 .exp = .un_node,
                 .exp2 = .un_node,
                 .log = .un_node,
@@ -1928,6 +1921,16 @@ pub const Inst = struct {
         dbg_block_begin,
         /// Marks the end of a semantic scope for debug info variables.
         dbg_block_end,
+        /// Given a pointer to a struct or object that contains virtual fields, returns the
+        /// named field.  If there is no named field, searches in the type for a decl that
+        /// matches the field name.  The decl is resolved and we ensure that it's a function
+        /// which can accept the object as the first parameter, with one pointer fixup.  If
+        /// all of that works, this instruction produces a special "bound function" value
+        /// which contains both the function and the saved first parameter value.
+        /// Bound functions may only be used as the function parameter to a `call` or
+        /// `builtin_call` instruction.  Any other use is invalid zir and may crash the compiler.
+        /// Uses `pl_node` field. The AST node is the `@field` builtin. Payload is FieldNamedNode.
+        field_call_bind_named,
 
         pub const InstData = struct {
             opcode: Extended,
@@ -2963,6 +2966,12 @@ pub const Inst = struct {
         field_name: Ref,
     };
 
+    pub const FieldNamedNode = struct {
+        node: i32,
+        lhs: Ref,
+        field_name: Ref,
+    };
+
     pub const As = struct {
         dest_type: Ref,
         operand: Ref,
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index fc37ae00dd8a..5ed7b63db3ce 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -533,6 +533,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 54de05347576..73f51f64816d 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -571,6 +571,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 15377378cd81..61fddee2079b 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -500,6 +500,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/sparcv9/CodeGen.zig b/src/arch/sparcv9/CodeGen.zig
index 7e1ecefbb760..bcd8cf8eebe1 100644
--- a/src/arch/sparcv9/CodeGen.zig
+++ b/src/arch/sparcv9/CodeGen.zig
@@ -451,6 +451,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 8eadfe6cd803..5171dfb46071 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1559,6 +1559,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 40973529752b..0103f5382f21 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -656,6 +656,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 464f144f5acf..46fee271cce7 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1749,6 +1749,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index b2abc536ef7d..c9ea5bebacc3 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3518,19 +3518,20 @@ pub const FuncGen = struct {
                 .shr                => try self.airShr(inst, false),
                 .shr_exact          => try self.airShr(inst, true),
 
-                .sqrt         => try self.airUnaryOp(inst, "sqrt"),
-                .sin          => try self.airUnaryOp(inst, "sin"),
-                .cos          => try self.airUnaryOp(inst, "cos"),
-                .exp          => try self.airUnaryOp(inst, "exp"),
-                .exp2         => try self.airUnaryOp(inst, "exp2"),
-                .log          => try self.airUnaryOp(inst, "log"),
-                .log2         => try self.airUnaryOp(inst, "log2"),
-                .log10        => try self.airUnaryOp(inst, "log10"),
-                .fabs         => try self.airUnaryOp(inst, "fabs"),
-                .floor        => try self.airUnaryOp(inst, "floor"),
-                .ceil         => try self.airUnaryOp(inst, "ceil"),
-                .round        => try self.airUnaryOp(inst, "round"),
-                .trunc_float  => try self.airUnaryOp(inst, "trunc"),
+                .sqrt         => try self.airUnaryOp(inst, .sqrt),
+                .sin          => try self.airUnaryOp(inst, .sin),
+                .cos          => try self.airUnaryOp(inst, .cos),
+                .tan          => try self.airUnaryOp(inst, .tan),
+                .exp          => try self.airUnaryOp(inst, .exp),
+                .exp2         => try self.airUnaryOp(inst, .exp2),
+                .log          => try self.airUnaryOp(inst, .log),
+                .log2         => try self.airUnaryOp(inst, .log2),
+                .log10        => try self.airUnaryOp(inst, .log10),
+                .fabs         => try self.airUnaryOp(inst, .fabs),
+                .floor        => try self.airUnaryOp(inst, .floor),
+                .ceil         => try self.airUnaryOp(inst, .ceil),
+                .round        => try self.airUnaryOp(inst, .round),
+                .trunc_float  => try self.airUnaryOp(inst, .trunc),
 
                 .cmp_eq  => try self.airCmp(inst, .eq),
                 .cmp_gt  => try self.airCmp(inst, .gt),
@@ -3905,7 +3906,7 @@ pub const FuncGen = struct {
         rhs: *const llvm.Value,
         operand_ty: Type,
         op: math.CompareOperator,
-    ) *const llvm.Value {
+    ) Allocator.Error!*const llvm.Value {
         var int_buffer: Type.Payload.Bits = undefined;
         var opt_buffer: Type.Payload.ElemType = undefined;
 
@@ -3947,7 +3948,7 @@ pub const FuncGen = struct {
                 self.builder.positionBuilderAtEnd(both_pl_block);
                 const lhs_payload = self.optPayloadHandle(lhs, is_by_ref);
                 const rhs_payload = self.optPayloadHandle(rhs, is_by_ref);
-                const payload_cmp = self.cmp(lhs_payload, rhs_payload, payload_ty, op);
+                const payload_cmp = try self.cmp(lhs_payload, rhs_payload, payload_ty, op);
                 _ = self.builder.buildBr(end_block);
                 const both_pl_block_end = self.builder.getInsertBlock();
 
@@ -3983,17 +3984,7 @@ pub const FuncGen = struct {
                 );
                 return phi_node;
             },
-            .Float => {
-                const operation: llvm.RealPredicate = switch (op) {
-                    .eq => .OEQ,
-                    .neq => .UNE,
-                    .lt => .OLT,
-                    .lte => .OLE,
-                    .gt => .OGT,
-                    .gte => .OGE,
-                };
-                return self.builder.buildFCmp(operation, lhs, rhs, "");
-            },
+            .Float => return self.buildFloatCmp(op, operand_ty, .{ lhs, rhs }),
             else => unreachable,
         };
         const is_signed = int_ty.isSignedInt();
@@ -5221,7 +5212,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFAdd(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWAdd(lhs, rhs, "");
         return self.builder.buildNUWAdd(lhs, rhs, "");
     }
@@ -5260,7 +5251,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFSub(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWSub(lhs, rhs, "");
         return self.builder.buildNUWSub(lhs, rhs, "");
     }
@@ -5298,7 +5289,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFMul(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWMul(lhs, rhs, "");
         return self.builder.buildNUWMul(lhs, rhs, "");
     }
@@ -5333,8 +5324,9 @@ pub const FuncGen = struct {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.air.typeOfIndex(inst);
 
-        return self.builder.buildFDiv(lhs, rhs, "");
+        return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
     }
 
     fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5347,8 +5339,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = self.builder.buildFDiv(lhs, rhs, "");
-            return self.callTrunc(result, inst_ty);
+            const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+            return self.buildFloatOp(.trunc, inst_ty, 1, .{result});
         }
         if (scalar_ty.isSignedInt()) return self.builder.buildSDiv(lhs, rhs, "");
         return self.builder.buildUDiv(lhs, rhs, "");
@@ -5364,8 +5356,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = self.builder.buildFDiv(lhs, rhs, "");
-            return try self.callFloor(result, inst_ty);
+            const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+            return self.buildFloatOp(.floor, inst_ty, 1, .{result});
         }
         if (scalar_ty.isSignedInt()) {
             // const d = @divTrunc(a, b);
@@ -5395,7 +5387,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.builder.buildFDiv(lhs, rhs, "");
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildExactSDiv(lhs, rhs, "");
         return self.builder.buildExactUDiv(lhs, rhs, "");
     }
@@ -5409,7 +5401,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.builder.buildFRem(lhs, rhs, "");
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildSRem(lhs, rhs, "");
         return self.builder.buildURem(lhs, rhs, "");
     }
@@ -5425,11 +5417,11 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const a = self.builder.buildFRem(lhs, rhs, "");
-            const b = self.builder.buildFAdd(a, rhs, "");
-            const c = self.builder.buildFRem(b, rhs, "");
+            const a = try self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
+            const b = try self.buildFloatOp(.add, inst_ty, 2, .{ a, rhs });
+            const c = try self.buildFloatOp(.fmod, inst_ty, 2, .{ b, rhs });
             const zero = inst_llvm_ty.constNull();
-            const ltz = self.builder.buildFCmp(.OLT, lhs, zero, "");
+            const ltz = try self.buildFloatCmp(.lt, inst_ty, .{ lhs, zero });
             return self.builder.buildSelect(ltz, c, a, "");
         }
         if (scalar_ty.isSignedInt()) {
@@ -5508,75 +5500,266 @@ pub const FuncGen = struct {
         return result_struct;
     }
 
-    fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
-        if (self.liveness.isUnused(inst)) return null;
+    fn buildElementwiseCall(
+        self: *FuncGen,
+        llvm_fn: *const llvm.Value,
+        args_vectors: []const *const llvm.Value,
+        result_vector: *const llvm.Value,
+        vector_len: usize,
+    ) !*const llvm.Value {
+        const args_len = @intCast(c_uint, args_vectors.len);
+        const llvm_i32 = self.context.intType(32);
+        assert(args_len <= 3);
 
-        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
-        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+        var i: usize = 0;
+        var result = result_vector;
+        while (i < vector_len) : (i += 1) {
+            const index_i32 = llvm_i32.constInt(i, .False);
 
-        const mulend1 = try self.resolveInst(extra.lhs);
-        const mulend2 = try self.resolveInst(extra.rhs);
-        const addend = try self.resolveInst(pl_op.operand);
+            var args: [3]*const llvm.Value = undefined;
+            for (args_vectors) |arg_vector, k| {
+                args[k] = self.builder.buildExtractElement(arg_vector, index_i32, "");
+            }
+            const result_elem = self.builder.buildCall(llvm_fn, &args, args_len, .C, .Auto, "");
+            result = self.builder.buildInsertElement(result, result_elem, index_i32, "");
+        }
+        return result;
+    }
 
-        const ty = self.air.typeOfIndex(inst);
-        const llvm_ty = try self.dg.llvmType(ty);
-        const scalar_ty = ty.scalarType();
-        const target = self.dg.module.getTarget();
+    fn getLibcFunction(
+        self: *FuncGen,
+        fn_name: [:0]const u8,
+        param_types: []const *const llvm.Type,
+        return_type: *const llvm.Type,
+    ) *const llvm.Value {
+        return self.dg.object.llvm_module.getNamedFunction(fn_name.ptr) orelse b: {
+            const alias = self.dg.object.llvm_module.getNamedGlobalAlias(fn_name.ptr, fn_name.len);
+            break :b if (alias) |a| a.getAliasee() else null;
+        } orelse b: {
+            const params_len = @intCast(c_uint, param_types.len);
+            const fn_type = llvm.functionType(return_type, param_types.ptr, params_len, .False);
+            const f = self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+            break :b f;
+        };
+    }
+
+    fn libcFloatPrefix(float_bits: u16) []const u8 {
+        return switch (float_bits) {
+            16, 80 => "__",
+            32, 64, 128 => "",
+            else => unreachable,
+        };
+    }
 
-        const Strat = union(enum) {
-            intrinsic,
-            libc: [*:0]const u8,
+    fn libcFloatSuffix(float_bits: u16) []const u8 {
+        return switch (float_bits) {
+            16 => "h", // Non-standard
+            32 => "f",
+            64 => "",
+            80 => "x", // Non-standard
+            128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+            else => unreachable,
         };
+    }
 
-        const strat: Strat = switch (scalar_ty.floatBits(target)) {
-            16, 32, 64 => Strat.intrinsic,
-            80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" },
-            // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`.
-            // On some targets this will be correct; on others it will be incorrect.
-            128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" },
+    fn compilerRtFloatAbbrev(float_bits: u16) []const u8 {
+        return switch (float_bits) {
+            16 => "h",
+            32 => "s",
+            64 => "d",
+            80 => "x",
+            128 => "t",
             else => unreachable,
         };
+    }
 
-        switch (strat) {
-            .intrinsic => {
-                const llvm_fn = self.getIntrinsic("llvm.fma", &.{llvm_ty});
-                const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
-                return self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
-            },
-            .libc => |fn_name| {
-                const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
-                const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: {
-                    const param_types = [_]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
-                    const fn_type = llvm.functionType(scalar_llvm_ty, &param_types, param_types.len, .False);
-                    break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type);
-                };
+    /// Creates a floating point comparison by lowering to the appropriate
+    /// hardware instruction or softfloat routine for the target
+    fn buildFloatCmp(
+        self: *FuncGen,
+        pred: math.CompareOperator,
+        ty: Type,
+        params: [2]*const llvm.Value,
+    ) !*const llvm.Value {
+        const target = self.dg.module.getTarget();
+        const scalar_ty = ty.scalarType();
+        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
 
-                if (ty.zigTypeTag() == .Vector) {
-                    const llvm_i32 = self.context.intType(32);
-                    const vector_llvm_ty = try self.dg.llvmType(ty);
+        if (intrinsicsAllowed(scalar_ty, target)) {
+            const llvm_predicate: llvm.RealPredicate = switch (pred) {
+                .eq => .OEQ,
+                .neq => .UNE,
+                .lt => .OLT,
+                .lte => .OLE,
+                .gt => .OGT,
+                .gte => .OGE,
+            };
+            return self.builder.buildFCmp(llvm_predicate, params[0], params[1], "");
+        }
+
+        const float_bits = scalar_ty.floatBits(target);
+        const compiler_rt_float_abbrev = compilerRtFloatAbbrev(float_bits);
+        var fn_name_buf: [64]u8 = undefined;
+        const fn_base_name = switch (pred) {
+            .neq => "ne",
+            .eq => "eq",
+            .lt => "lt",
+            .lte => "le",
+            .gt => "gt",
+            .gte => "ge",
+        };
+        const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{
+            fn_base_name, compiler_rt_float_abbrev,
+        }) catch unreachable;
 
-                    var i: usize = 0;
-                    var vector = vector_llvm_ty.getUndef();
-                    while (i < ty.vectorLen()) : (i += 1) {
-                        const index_i32 = llvm_i32.constInt(i, .False);
+        const param_types = [2]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty };
+        const llvm_i32 = self.context.intType(32);
+        const libc_fn = self.getLibcFunction(fn_name, param_types[0..], llvm_i32);
 
-                        const mulend1_elem = self.builder.buildExtractElement(mulend1, index_i32, "");
-                        const mulend2_elem = self.builder.buildExtractElement(mulend2, index_i32, "");
-                        const addend_elem = self.builder.buildExtractElement(addend, index_i32, "");
+        const zero = llvm_i32.constInt(0, .False);
+        const int_pred: llvm.IntPredicate = switch (pred) {
+            .eq => .EQ,
+            .neq => .NE,
+            .lt => .SLT,
+            .lte => .SLE,
+            .gt => .SGT,
+            .gte => .SGE,
+        };
 
-                        const params = [_]*const llvm.Value{ mulend1_elem, mulend2_elem, addend_elem };
-                        const mul_add = self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
+        if (ty.zigTypeTag() == .Vector) {
+            const vec_len = ty.vectorLen();
+            const vector_result_ty = llvm_i32.vectorType(vec_len);
+
+            var result = vector_result_ty.getUndef();
+            result = try self.buildElementwiseCall(libc_fn, &params, result, vec_len);
+
+            const zero_vector = self.builder.buildVectorSplat(vec_len, zero, "");
+            return self.builder.buildICmp(int_pred, result, zero_vector, "");
+        }
+
+        const result = self.builder.buildCall(libc_fn, &params, params.len, .C, .Auto, "");
+        return self.builder.buildICmp(int_pred, result, zero, "");
+    }
+
+    const FloatOp = enum {
+        add,
+        ceil,
+        cos,
+        div,
+        exp,
+        exp2,
+        fabs,
+        floor,
+        fma,
+        log,
+        log10,
+        log2,
+        fmax,
+        fmin,
+        mul,
+        fmod,
+        round,
+        sin,
+        sqrt,
+        sub,
+        tan,
+        trunc,
+    };
 
-                        vector = self.builder.buildInsertElement(vector, mul_add, index_i32, "");
-                    }
+    const FloatOpStrat = union(enum) {
+        intrinsic: []const u8,
+        libc: [:0]const u8,
+    };
 
-                    return vector;
-                } else {
-                    const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
-                    return self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
+    /// Creates a floating point operation (add, sub, fma, sqrt, exp, etc.)
+    /// by lowering to the appropriate hardware instruction or softfloat
+    /// routine for the target
+    fn buildFloatOp(
+        self: *FuncGen,
+        comptime op: FloatOp,
+        ty: Type,
+        comptime params_len: usize,
+        params: [params_len]*const llvm.Value,
+    ) !*const llvm.Value {
+        const target = self.dg.module.getTarget();
+        const scalar_ty = ty.scalarType();
+        const llvm_ty = try self.dg.llvmType(ty);
+        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+
+        const intrinsics_allowed = op != .tan and intrinsicsAllowed(scalar_ty, target);
+        var fn_name_buf: [64]u8 = undefined;
+        const strat: FloatOpStrat = if (intrinsics_allowed) switch (op) {
+            // Some operations are dedicated LLVM instructions, not available as intrinsics
+            .add => return self.builder.buildFAdd(params[0], params[1], ""),
+            .sub => return self.builder.buildFSub(params[0], params[1], ""),
+            .mul => return self.builder.buildFMul(params[0], params[1], ""),
+            .div => return self.builder.buildFDiv(params[0], params[1], ""),
+            .fmod => return self.builder.buildFRem(params[0], params[1], ""),
+            .fmax => return self.builder.buildMaxNum(params[0], params[1], ""),
+            .fmin => return self.builder.buildMinNum(params[0], params[1], ""),
+            else => .{ .intrinsic = "llvm." ++ @tagName(op) },
+        } else b: {
+            const float_bits = scalar_ty.floatBits(target);
+            break :b switch (op) {
+                .add, .sub, .div, .mul => FloatOpStrat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{
+                        @tagName(op), compilerRtFloatAbbrev(float_bits),
+                    }) catch unreachable,
+                },
+                .ceil,
+                .cos,
+                .exp,
+                .exp2,
+                .fabs,
+                .floor,
+                .fma,
+                .fmax,
+                .fmin,
+                .fmod,
+                .log,
+                .log10,
+                .log2,
+                .round,
+                .sin,
+                .sqrt,
+                .tan,
+                .trunc,
+                => FloatOpStrat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}{s}", .{
+                        libcFloatPrefix(float_bits), @tagName(op), libcFloatSuffix(float_bits),
+                    }) catch unreachable,
+                },
+            };
+        };
+
+        const llvm_fn: *const llvm.Value = switch (strat) {
+            .intrinsic => |fn_name| self.getIntrinsic(fn_name, &.{llvm_ty}),
+            .libc => |fn_name| b: {
+                const param_types = [3]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
+                const libc_fn = self.getLibcFunction(fn_name, param_types[0..params.len], scalar_llvm_ty);
+                if (ty.zigTypeTag() == .Vector) {
+                    const result = llvm_ty.getUndef();
+                    return self.buildElementwiseCall(libc_fn, &params, result, ty.vectorLen());
                 }
+
+                break :b libc_fn;
             },
-        }
+        };
+        return self.builder.buildCall(llvm_fn, &params, params_len, .C, .Auto, "");
+    }
+
+    fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+
+        const mulend1 = try self.resolveInst(extra.lhs);
+        const mulend2 = try self.resolveInst(extra.rhs);
+        const addend = try self.resolveInst(pl_op.operand);
+
+        const ty = self.air.typeOfIndex(inst);
+        return self.buildFloatOp(.fma, ty, 3, .{ mulend1, mulend2, addend });
     }
 
     fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -6381,14 +6564,14 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
+    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: FloatOp) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
         const un_op = self.air.instructions.items(.data)[inst].un_op;
         const operand = try self.resolveInst(un_op);
         const operand_ty = self.air.typeOf(un_op);
 
-        return self.callFloatUnary(operand, operand_ty, llvm_fn_name);
+        return self.buildFloatOp(op, operand_ty, 1, .{operand});
     }
 
     fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
@@ -6652,17 +6835,9 @@ pub const FuncGen = struct {
 
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const scalar = try self.resolveInst(ty_op.operand);
-        const scalar_ty = self.air.typeOf(ty_op.operand);
         const vector_ty = self.air.typeOfIndex(inst);
         const len = vector_ty.vectorLen();
-        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
-        const op_llvm_ty = scalar_llvm_ty.vectorType(1);
-        const u32_llvm_ty = self.context.intType(32);
-        const mask_llvm_ty = u32_llvm_ty.vectorType(len);
-        const undef_vector = op_llvm_ty.getUndef();
-        const u32_zero = u32_llvm_ty.constNull();
-        const op_vector = self.builder.buildInsertElement(undef_vector, scalar, u32_zero, "");
-        return self.builder.buildShuffleVector(op_vector, undef_vector, mask_llvm_ty.constNull(), "");
+        return self.builder.buildVectorSplat(len, scalar, "");
     }
 
     fn airSelect(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -7191,48 +7366,6 @@ pub const FuncGen = struct {
         return self.builder.buildExtractValue(opt_handle, 0, "");
     }
 
-    fn callFloor(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "floor");
-    }
-
-    fn callCeil(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "ceil");
-    }
-
-    fn callTrunc(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "trunc");
-    }
-
-    fn callFloatUnary(
-        self: *FuncGen,
-        arg: *const llvm.Value,
-        ty: Type,
-        name: []const u8,
-    ) !*const llvm.Value {
-        const target = self.dg.module.getTarget();
-
-        var fn_name_buf: [100]u8 = undefined;
-        const llvm_fn_name = switch (ty.zigTypeTag()) {
-            .Vector => std.fmt.bufPrintZ(&fn_name_buf, "llvm.{s}.v{d}f{d}", .{
-                name, ty.vectorLen(), ty.childType().floatBits(target),
-            }) catch unreachable,
-            .Float => std.fmt.bufPrintZ(&fn_name_buf, "llvm.{s}.f{d}", .{
-                name, ty.floatBits(target),
-            }) catch unreachable,
-            else => unreachable,
-        };
-
-        const llvm_fn = self.dg.object.llvm_module.getNamedFunction(llvm_fn_name) orelse blk: {
-            const operand_llvm_ty = try self.dg.llvmType(ty);
-            const param_types = [_]*const llvm.Type{operand_llvm_ty};
-            const fn_type = llvm.functionType(operand_llvm_ty, &param_types, param_types.len, .False);
-            break :blk self.dg.object.llvm_module.addFunction(llvm_fn_name, fn_type);
-        };
-
-        const args: [1]*const llvm.Value = .{arg};
-        return self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, "");
-    }
-
     fn fieldPtr(
         self: *FuncGen,
         inst: Air.Inst.Index,
@@ -8055,6 +8188,26 @@ fn backendSupportsF80(target: std.Target) bool {
     };
 }
 
+/// This function returns true if we expect LLVM to lower f16 correctly
+/// and false if we expect LLVM to crash if it counters an f16 type or
+/// if it produces miscompilations.
+fn backendSupportsF16(target: std.Target) bool {
+    return switch (target.cpu.arch) {
+        else => true,
+    };
+}
+
+/// LLVM does not support all relevant intrinsics for all targets, so we
+/// may need to manually generate a libc call
+fn intrinsicsAllowed(scalar_ty: Type, target: std.Target) bool {
+    return switch (scalar_ty.tag()) {
+        .f16 => backendSupportsF16(target),
+        .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
+        .f128 => target.longDoubleIs(f128),
+        else => true,
+    };
+}
+
 /// We need to insert extra padding if LLVM's isn't enough.
 /// However we don't want to ever call LLVMABIAlignmentOfType or
 /// LLVMABISizeOfType because these functions will trip assertions
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
index 3863385a0686..b8dc3e183081 100644
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -675,6 +675,14 @@ pub const Builder = opaque {
         Name: [*:0]const u8,
     ) *const Value;
 
+    pub const buildVectorSplat = LLVMBuildVectorSplat;
+    extern fn LLVMBuildVectorSplat(
+        *const Builder,
+        ElementCount: c_uint,
+        EltVal: *const Value,
+        Name: [*:0]const u8,
+    ) *const Value;
+
     pub const buildPtrToInt = LLVMBuildPtrToInt;
     extern fn LLVMBuildPtrToInt(
         *const Builder,
diff --git a/src/print_air.zig b/src/print_air.zig
index 27d222f2623f..6e336e138b20 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -158,6 +158,7 @@ const Writer = struct {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index e85e69fe7f6f..776aeffbdc3c 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -207,6 +207,7 @@ const Writer = struct {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
@@ -400,7 +401,6 @@ const Writer = struct {
 
             .field_ptr_named,
             .field_val_named,
-            .field_call_bind_named,
             => try self.writePlNodeFieldNamed(stream, inst),
 
             .as_node => try self.writeAs(stream, inst),
@@ -509,6 +509,16 @@ const Writer = struct {
                 try stream.writeAll(")) ");
                 try self.writeSrc(stream, src);
             },
+
+            .field_call_bind_named => {
+                const extra = self.code.extraData(Zir.Inst.FieldNamedNode, extended.operand).data;
+                const src: LazySrcLoc = .{ .node_offset = extra.node };
+                try self.writeInstRef(stream, extra.lhs);
+                try stream.writeAll(", ");
+                try self.writeInstRef(stream, extra.field_name);
+                try stream.writeAll(") ");
+                try self.writeSrc(stream, src);
+            },
         }
     }
 
diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp
index cbefcd107818..398693e6d813 100644
--- a/src/stage1/all_types.hpp
+++ b/src/stage1/all_types.hpp
@@ -1768,6 +1768,7 @@ enum BuiltinFnId {
     BuiltinFnIdSqrt,
     BuiltinFnIdSin,
     BuiltinFnIdCos,
+    BuiltinFnIdTan,
     BuiltinFnIdExp,
     BuiltinFnIdExp2,
     BuiltinFnIdLog,
diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
index 15a8fdf81e2e..aef4966ee71f 100644
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@@ -8928,7 +8928,7 @@ static void resolve_llvm_types_struct(CodeGen *g, ZigType *struct_type, ResolveS
 
             assert(next_offset >= llvm_next_offset);
             if (next_offset > llvm_next_offset) {
-                size_t pad_bytes = next_offset - (field->offset + LLVMStoreSizeOfType(g->target_data_ref, llvm_type));
+                size_t pad_bytes = next_offset - (field->offset + LLVMABISizeOfType(g->target_data_ref, llvm_type));
                 if (pad_bytes != 0) {
                     LLVMTypeRef pad_llvm_type = LLVMArrayType(LLVMInt8Type(), pad_bytes);
                     element_types[gen_field_index] = pad_llvm_type;
@@ -10375,7 +10375,7 @@ void ZigValue::dump() {
 
 // float ops that take a single argument
 //TODO Powi, Pow, minnum, maxnum, maximum, minimum, copysign, lround, llround, lrint, llrint
-const char *float_op_to_name(BuiltinFnId op) {
+const char *float_un_op_to_name(BuiltinFnId op) {
     switch (op) {
     case BuiltinFnIdSqrt:
         return "sqrt";
@@ -10383,6 +10383,8 @@ const char *float_op_to_name(BuiltinFnId op) {
         return "sin";
     case BuiltinFnIdCos:
         return "cos";
+    case BuiltinFnIdTan:
+        return "tan";
     case BuiltinFnIdExp:
         return "exp";
     case BuiltinFnIdExp2:
@@ -10405,6 +10407,8 @@ const char *float_op_to_name(BuiltinFnId op) {
         return "nearbyint";
     case BuiltinFnIdRound:
         return "round";
+    case BuiltinFnIdMulAdd:
+        return "fma";
     default:
         zig_unreachable();
     }
diff --git a/src/stage1/analyze.hpp b/src/stage1/analyze.hpp
index 6d584ff36152..64e0e199f877 100644
--- a/src/stage1/analyze.hpp
+++ b/src/stage1/analyze.hpp
@@ -307,7 +307,7 @@ void copy_const_val(CodeGen *g, ZigValue *dest, ZigValue *src);
 bool type_has_optional_repr(ZigType *ty);
 bool is_opt_err_set(ZigType *ty);
 bool type_is_numeric(ZigType *ty);
-const char *float_op_to_name(BuiltinFnId op);
+const char *float_un_op_to_name(BuiltinFnId op);
 
 #define src_assert(OK, SOURCE_NODE) src_assert_impl((OK), (SOURCE_NODE), __FILE__, __LINE__)
 
diff --git a/src/stage1/astgen.cpp b/src/stage1/astgen.cpp
index 35566e214302..367bed69cf0a 100644
--- a/src/stage1/astgen.cpp
+++ b/src/stage1/astgen.cpp
@@ -4497,6 +4497,7 @@ static Stage1ZirInst *astgen_builtin_fn_call(Stage1AstGen *ag, Scope *scope, Ast
         case BuiltinFnIdSqrt:
         case BuiltinFnIdSin:
         case BuiltinFnIdCos:
+        case BuiltinFnIdTan:
         case BuiltinFnIdExp:
         case BuiltinFnIdExp2:
         case BuiltinFnIdLog:
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index a2efed6bdea0..9d46a660bcab 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -869,7 +869,7 @@ static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn
         name = "fma";
         num_args = 3;
     } else if (fn_id == ZigLLVMFnIdFloatOp) {
-        name = float_op_to_name(op);
+        name = float_un_op_to_name(op);
         num_args = 1;
     } else {
         zig_unreachable();
@@ -1604,8 +1604,57 @@ static LLVMValueRef gen_assert_zero(CodeGen *g, LLVMValueRef expr_val, ZigType *
     return nullptr;
 }
 
+static const char *get_compiler_rt_type_abbrev(ZigType *type) {
+    uint16_t bits;
+    if (type->id == ZigTypeIdFloat) {
+        bits = type->data.floating.bit_count;
+    } else if (type->id == ZigTypeIdInt) {
+        bits = type->data.integral.bit_count;
+    } else {
+        zig_unreachable();
+    }
+    switch (bits) {
+        case 16:
+            return "h";
+        case 32:
+            return "s";
+        case 64:
+            return "d";
+        case 80:
+            return "x";
+        case 128:
+            return "t";
+        default:
+            zig_unreachable();
+    }
+}
 
-static LLVMValueRef gen_soft_f80_widen_or_shorten(CodeGen *g, ZigType *actual_type,
+static const char *libc_float_prefix(CodeGen *g, ZigType *float_type) {
+    switch (float_type->data.floating.bit_count) {
+        case 16:
+        case 80:
+            return "__";
+        case 32:
+        case 64:
+        case 128:
+            return "";
+        default:
+            zig_unreachable();
+    }
+}
+
+static const char *libc_float_suffix(CodeGen *g, ZigType *float_type) {
+    switch (float_type->size_in_bits) {
+        case 16: return "h"; // Non-standard
+        case 32: return "f";
+        case 64: return "";
+        case 80: return "x"; // Non-standard
+        case 128: return "q"; // Non-standard
+        default: zig_unreachable();
+    }
+}
+
+static LLVMValueRef gen_soft_float_widen_or_shorten(CodeGen *g, ZigType *actual_type,
         ZigType *wanted_type, LLVMValueRef expr_val)
 {
     ZigType *scalar_actual_type = (actual_type->id == ZigTypeIdVector) ?
@@ -1615,87 +1664,47 @@ static LLVMValueRef gen_soft_f80_widen_or_shorten(CodeGen *g, ZigType *actual_ty
     uint64_t actual_bits = scalar_actual_type->data.floating.bit_count;
     uint64_t wanted_bits = scalar_wanted_type->data.floating.bit_count;
 
-
-    LLVMTypeRef param_type;
-    LLVMTypeRef return_type;
-    const char *func_name;
+    if (actual_bits == wanted_bits)
+        return expr_val;
 
     LLVMValueRef result;
     bool castTruncatedToF16 = false;
 
-    if (actual_bits == wanted_bits) {
-        return expr_val;
-    } else if (actual_bits == 80) {
-        param_type = g->builtin_types.entry_f80->llvm_type;
-        switch (wanted_bits) {
-            case 16:
-                // Only Arm has a native f16 type, other platforms soft-implement it
-                // using u16 instead.
-                if (target_is_arm(g->zig_target)) {
-                    return_type = g->builtin_types.entry_f16->llvm_type;
-                } else {
-                    return_type = g->builtin_types.entry_u16->llvm_type;
-                    castTruncatedToF16 = true;
-                }
-                func_name = "__truncxfhf2";
-                break;
-            case 32:
-                return_type = g->builtin_types.entry_f32->llvm_type;
-                func_name = "__truncxfsf2";
-                break;
-            case 64:
-                return_type = g->builtin_types.entry_f64->llvm_type;
-                func_name = "__truncxfdf2";
-                break;
-            case 128:
-                return_type = g->builtin_types.entry_f128->llvm_type;
-                func_name = "__extendxftf2";
-                break;
-            default:
-                zig_unreachable();
+    char fn_name[64];
+    if (wanted_bits < actual_bits) {
+        sprintf(fn_name, "__trunc%sf%sf2",
+            get_compiler_rt_type_abbrev(scalar_actual_type),
+            get_compiler_rt_type_abbrev(scalar_wanted_type));
+    } else {
+        sprintf(fn_name, "__extend%sf%sf2",
+            get_compiler_rt_type_abbrev(scalar_actual_type),
+            get_compiler_rt_type_abbrev(scalar_wanted_type));
+    }
+
+    LLVMTypeRef return_type = scalar_wanted_type->llvm_type;
+    LLVMTypeRef param_type = scalar_actual_type->llvm_type;
+
+    if (!target_is_arm(g->zig_target)) {
+        // Only Arm has a native f16 type, other platforms soft-implement it using u16 instead.
+        if (scalar_wanted_type == g->builtin_types.entry_f16) {
+            return_type = g->builtin_types.entry_u16->llvm_type;
+            castTruncatedToF16 = true;
         }
-    } else if (wanted_bits == 80) {
-        return_type = g->builtin_types.entry_f80->llvm_type;
-        switch (actual_bits) {
-            case 16:
-                // Only Arm has a native f16 type, other platforms soft-implement it
-                // using u16 instead.
-                if (target_is_arm(g->zig_target)) {
-                    param_type = g->builtin_types.entry_f16->llvm_type;
-                } else {
-                    param_type = g->builtin_types.entry_u16->llvm_type;
-                    expr_val = LLVMBuildBitCast(g->builder, expr_val, param_type, "");
-                }
-                func_name = "__extendhfxf2";
-                break;
-            case 32:
-                param_type = g->builtin_types.entry_f32->llvm_type;
-                func_name = "__extendsfxf2";
-                break;
-            case 64:
-                param_type = g->builtin_types.entry_f64->llvm_type;
-                func_name = "__extenddfxf2";
-                break;
-            case 128:
-                param_type = g->builtin_types.entry_f128->llvm_type;
-                func_name = "__trunctfxf2";
-                break;
-            default:
-                zig_unreachable();
+        if (scalar_actual_type == g->builtin_types.entry_f16) {
+            param_type = g->builtin_types.entry_u16->llvm_type;
+            expr_val = LLVMBuildBitCast(g->builder, expr_val, param_type, "");
         }
-    } else {
-        zig_unreachable();
     }
 
-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, func_name);
+    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
     if (func_ref == nullptr) {
         LLVMTypeRef fn_type = LLVMFunctionType(return_type, &param_type, 1, false);
-        func_ref = LLVMAddFunction(g->module, func_name, fn_type);
+        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
     }
 
     result = LLVMBuildCall(g->builder, func_ref, &expr_val, 1, "");
 
-    // On non-Arm platforms we need to bitcast __truncxfhf2 result back to f16
+    // On non-Arm platforms we need to bitcast __trunc<>fhf2 result back to f16
     if (castTruncatedToF16) {
         result = LLVMBuildBitCast(g->builder, result, g->builtin_types.entry_f16->llvm_type, "");
     }
@@ -1721,7 +1730,7 @@ static LLVMValueRef gen_widen_or_shorten(CodeGen *g, bool want_runtime_safety, Z
             || scalar_wanted_type == g->builtin_types.entry_f80)
          && !target_has_f80(g->zig_target))
         {
-            return gen_soft_f80_widen_or_shorten(g, actual_type, wanted_type, expr_val);
+            return gen_soft_float_widen_or_shorten(g, actual_type, wanted_type, expr_val);
         }
         actual_bits = scalar_actual_type->data.floating.bit_count;
         wanted_bits = scalar_wanted_type->data.floating.bit_count;
@@ -2978,10 +2987,54 @@ static LLVMValueRef gen_overflow_shr_op(CodeGen *g, ZigType *operand_type,
     return result;
 }
 
-static LLVMValueRef gen_float_op(CodeGen *g, LLVMValueRef val, ZigType *type_entry, BuiltinFnId op) {
-    assert(type_entry->id == ZigTypeIdFloat || type_entry->id == ZigTypeIdVector);
-    LLVMValueRef floor_fn = get_float_fn(g, type_entry, ZigLLVMFnIdFloatOp, op);
-    return LLVMBuildCall(g->builder, floor_fn, &val, 1, "");
+static LLVMValueRef get_soft_float_fn(CodeGen *g, const char *name, int param_count, LLVMTypeRef param_type, LLVMTypeRef return_type) {
+    LLVMValueRef existing_llvm_fn = LLVMGetNamedFunction(g->module, name);
+    if (existing_llvm_fn != nullptr) return existing_llvm_fn;
+    LLVMValueRef existing_llvm_alias = LLVMGetNamedGlobalAlias(g->module, name, strlen(name));
+    if (existing_llvm_alias != nullptr) return LLVMAliasGetAliasee(existing_llvm_alias);
+
+    LLVMTypeRef param_types[3] = { param_type, param_type, param_type };
+    LLVMTypeRef fn_type = LLVMFunctionType(return_type, param_types, param_count, false);
+    return LLVMAddFunction(g->module, name, fn_type);
+}
+
+static LLVMValueRef gen_soft_float_un_op(CodeGen *g, LLVMValueRef op, ZigType *operand_type, BuiltinFnId op_id) {
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+    ZigType *scalar_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
+
+    char fn_name[64];
+    sprintf(fn_name, "%s%s%s", libc_float_prefix(g, scalar_type),
+            float_un_op_to_name(op_id), libc_float_suffix(g, scalar_type));
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 1, scalar_type->llvm_type, scalar_type->llvm_type);
+
+    LLVMValueRef result;
+    if (vector_len == 0) {
+        return LLVMBuildCall(g->builder, func_ref, &op, 1, "");
+    } else {
+        result = build_alloca(g, operand_type, "", 0);
+        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
+        for (uint32_t i = 0; i < vector_len; i++) {
+            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
+            LLVMValueRef param = LLVMBuildExtractElement(g->builder, op, index_value, "");
+            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, &param, 1, "");
+            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
+                call_result, index_value, "");
+        }
+        return LLVMBuildLoad(g->builder, result, "");
+    }
+}
+
+static LLVMValueRef gen_float_un_op(CodeGen *g, LLVMValueRef operand, ZigType *operand_type, BuiltinFnId op) {
+    assert(operand_type->id == ZigTypeIdFloat || operand_type->id == ZigTypeIdVector);
+    ZigType *elem_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
+    if ((elem_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (elem_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        op == BuiltinFnIdTan)
+    {
+        return gen_soft_float_un_op(g, operand, operand_type, op);
+    }
+    LLVMValueRef float_op_fn = get_float_fn(g, operand_type, ZigLLVMFnIdFloatOp, op);
+    return LLVMBuildCall(g->builder, float_op_fn, &operand, 1, "");
 }
 
 enum DivKind {
@@ -3088,7 +3141,7 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
             case DivKindExact:
                 if (want_runtime_safety) {
                     // Safety check: a / b == floor(a / b)
-                    LLVMValueRef floored = gen_float_op(g, result, operand_type, BuiltinFnIdFloor);
+                    LLVMValueRef floored = gen_float_un_op(g, result, operand_type, BuiltinFnIdFloor);
 
                     LLVMBasicBlockRef ok_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactOk");
                     LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactFail");
@@ -3105,9 +3158,9 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                 }
                 return result;
             case DivKindTrunc:
-                return gen_float_op(g, result, operand_type, BuiltinFnIdTrunc);
+                return gen_float_un_op(g, result, operand_type, BuiltinFnIdTrunc);
             case DivKindFloor:
-                return gen_float_op(g, result, operand_type, BuiltinFnIdFloor);
+                return gen_float_un_op(g, result, operand_type, BuiltinFnIdFloor);
         }
         zig_unreachable();
     }
@@ -3269,17 +3322,7 @@ static void gen_shift_rhs_check(CodeGen *g, ZigType *lhs_type, ZigType *rhs_type
     }
 }
 
-static LLVMValueRef get_soft_f80_bin_op_func(CodeGen *g, const char *name, int param_count, LLVMTypeRef return_type) {
-    LLVMValueRef existing_llvm_fn = LLVMGetNamedFunction(g->module, name);
-    if (existing_llvm_fn != nullptr) return existing_llvm_fn;
-
-    LLVMTypeRef float_type_ref = g->builtin_types.entry_f80->llvm_type;
-    LLVMTypeRef param_types[2] = { float_type_ref, float_type_ref };
-    LLVMTypeRef fn_type = LLVMFunctionType(return_type, param_types, param_count, false);
-    return LLVMAddFunction(g->module, name, fn_type);
-}
-
-enum SoftF80Icmp {
+enum Icmp {
     NONE,
     EQ_ZERO,
     NE_ZERO,
@@ -3289,7 +3332,7 @@ enum SoftF80Icmp {
     EQ_ONE,
 };
 
-static LLVMValueRef add_f80_icmp(CodeGen *g, LLVMValueRef val, SoftF80Icmp kind) {
+static LLVMValueRef add_icmp(CodeGen *g, LLVMValueRef val, Icmp kind) {
     switch (kind) {
         case NONE:
             return val;
@@ -3322,22 +3365,124 @@ static LLVMValueRef add_f80_icmp(CodeGen *g, LLVMValueRef val, SoftF80Icmp kind)
     }
 }
 
-static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
-        Stage1AirInstBinOp *bin_op_instruction)
-{
-    IrBinOp op_id = bin_op_instruction->op_id;
-    Stage1AirInst *op1 = bin_op_instruction->op1;
-    Stage1AirInst *op2 = bin_op_instruction->op2;
-    uint32_t vector_len = op1->value->type->id == ZigTypeIdVector ? op1->value->type->data.vector.len : 0;
+static LLVMValueRef gen_soft_int_to_float_op(CodeGen *g, LLVMValueRef value_ref, ZigType *operand_type, ZigType *result_type) {
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
 
-    LLVMValueRef op1_value = ir_llvm_value(g, op1);
-    LLVMValueRef op2_value = ir_llvm_value(g, op2);
+    // Handle integers of non-pot bitsize by widening them.
+    const size_t bitsize = operand_type->data.integral.bit_count;
+    const bool is_signed = operand_type->data.integral.is_signed;
+    if (bitsize < 32 || !is_power_of_2(bitsize)) {
+        const size_t wider_bitsize = bitsize < 32 ? 32 : round_to_next_power_of_2(bitsize);
+        ZigType *const wider_type = get_int_type(g, is_signed, wider_bitsize);
+        value_ref = gen_widen_or_shorten(g, false, operand_type, wider_type, value_ref);
+        operand_type = wider_type;
+    }
+    assert(bitsize <= 128);
 
-    bool div_exact_safety_check = false;
-    LLVMTypeRef return_type = g->builtin_types.entry_f80->llvm_type;
+    const char *int_compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_type);
+    const char *float_compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(result_type);
+
+    char fn_name[64];
+    if (is_signed) {
+        sprintf(fn_name, "__float%si%sf", int_compiler_rt_type_abbrev, float_compiler_rt_type_abbrev);
+    } else {
+        sprintf(fn_name, "__floatun%si%sf", int_compiler_rt_type_abbrev, float_compiler_rt_type_abbrev);
+    }
+
+    int param_count = 1;
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, result_type->llvm_type);
+
+    LLVMValueRef result;
+    if (vector_len == 0) {
+        LLVMValueRef params[1] = {value_ref};
+        result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+    } else {
+        ZigType *alloca_ty = operand_type;
+        result = build_alloca(g, alloca_ty, "", 0);
+
+        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
+        for (uint32_t i = 0; i < vector_len; i++) {
+            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
+            LLVMValueRef params[1] = {
+                LLVMBuildExtractElement(g->builder, value_ref, index_value, ""),
+            };
+            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
+                call_result, index_value, "");
+        }
+
+        result = LLVMBuildLoad(g->builder, result, "");
+    }
+    return result;
+}
+
+static LLVMValueRef gen_soft_float_to_int_op(CodeGen *g, LLVMValueRef value_ref, ZigType *operand_type, ZigType *result_type) {
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+
+    // Handle integers of non-pot bitsize by truncating a sufficiently wide pot integer
+    const size_t bitsize = result_type->data.integral.bit_count;
+    const bool is_signed = result_type->data.integral.is_signed;
+    ZigType * wider_type = result_type;
+    if (bitsize < 32 || !is_power_of_2(bitsize)) {
+        const size_t wider_bitsize = bitsize < 32 ? 32 : round_to_next_power_of_2(bitsize);
+        wider_type = get_int_type(g, is_signed, wider_bitsize);
+    }
+    assert(bitsize <= 128);
+
+    const char *float_compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_type);
+    const char *int_compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(wider_type);
+
+    char fn_name[64];
+    if (is_signed) {
+        sprintf(fn_name, "__fix%sf%si", float_compiler_rt_type_abbrev, int_compiler_rt_type_abbrev);
+    } else {
+        sprintf(fn_name, "__fixuns%sf%si", float_compiler_rt_type_abbrev, int_compiler_rt_type_abbrev);
+    }
+
+    int param_count = 1;
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, wider_type->llvm_type);
+
+    LLVMValueRef result;
+    if (vector_len == 0) {
+        LLVMValueRef params[1] = {value_ref};
+        result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+    } else {
+        ZigType *alloca_ty = operand_type;
+        result = build_alloca(g, alloca_ty, "", 0);
+
+        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
+        for (uint32_t i = 0; i < vector_len; i++) {
+            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
+            LLVMValueRef params[1] = {
+                LLVMBuildExtractElement(g->builder, value_ref, index_value, ""),
+            };
+            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
+                call_result, index_value, "");
+        }
+
+        result = LLVMBuildLoad(g->builder, result, "");
+    }
+
+    // Handle integers of non-pot bitsize by shortening them on the output
+    if (result_type != wider_type) {
+        return gen_widen_or_shorten(g, false, wider_type, result_type, result);
+    }
+    return result;
+}
+
+static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LLVMValueRef op2_value, ZigType *operand_type, IrBinOp op_id) {
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+
+    LLVMTypeRef return_type = operand_type->llvm_type;
     int param_count = 2;
-    const char *func_name;
-    SoftF80Icmp res_icmp = NONE;
+
+    const char *compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_type);
+    const char *math_float_prefix = libc_float_prefix(g, operand_type);
+    const char *math_float_suffix = libc_float_suffix(g, operand_type);
+
+    char fn_name[64];
+    Icmp res_icmp = NONE;
     switch (op_id) {
         case IrBinOpInvalid:
         case IrBinOpArrayCat:
@@ -3362,152 +3507,129 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
             zig_unreachable();
         case IrBinOpCmpEq:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__eqxf2";
+            sprintf(fn_name, "__eq%sf2", compiler_rt_type_abbrev);
             res_icmp = EQ_ZERO;
             break;
         case IrBinOpCmpNotEq:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__nexf2";
+            sprintf(fn_name, "__ne%sf2", compiler_rt_type_abbrev);
             res_icmp = NE_ZERO;
             break;
         case IrBinOpCmpLessOrEq:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__lexf2";
+            sprintf(fn_name, "__le%sf2", compiler_rt_type_abbrev);
             res_icmp = LE_ZERO;
             break;
         case IrBinOpCmpLessThan:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__lexf2";
+            sprintf(fn_name, "__le%sf2", compiler_rt_type_abbrev);
             res_icmp = EQ_NEG;
             break;
         case IrBinOpCmpGreaterOrEq:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__gexf2";
+            sprintf(fn_name, "__ge%sf2", compiler_rt_type_abbrev);
             res_icmp = GE_ZERO;
             break;
         case IrBinOpCmpGreaterThan:
             return_type = g->builtin_types.entry_i32->llvm_type;
-            func_name = "__gexf2";
+            sprintf(fn_name, "__ge%sf2", compiler_rt_type_abbrev);
             res_icmp = EQ_ONE;
             break;
         case IrBinOpMaximum:
-            func_name = "__fmaxx";
+            sprintf(fn_name, "%sfmax%s", math_float_prefix, math_float_suffix);
             break;
         case IrBinOpMinimum:
-            func_name = "__fminx";
+            sprintf(fn_name, "%sfmin%s", math_float_prefix, math_float_suffix);
             break;
         case IrBinOpMult:
-            func_name = "__mulxf3";
+            sprintf(fn_name, "__mul%sf3", compiler_rt_type_abbrev);
             break;
         case IrBinOpAdd:
-            func_name = "__addxf3";
+            sprintf(fn_name, "__add%sf3", compiler_rt_type_abbrev);
             break;
         case IrBinOpSub:
-            func_name = "__subxf3";
+            sprintf(fn_name, "__sub%sf3", compiler_rt_type_abbrev);
             break;
         case IrBinOpDivUnspecified:
-            func_name = "__divxf3";
-            break;
         case IrBinOpDivExact:
-            func_name = "__divxf3";
-            div_exact_safety_check = bin_op_instruction->safety_check_on &&
-                ir_want_runtime_safety(g, &bin_op_instruction->base);
-            break;
         case IrBinOpDivTrunc:
-            param_count = 1;
-            func_name = "__truncx";
-            break;
         case IrBinOpDivFloor:
-            param_count = 1;
-            func_name = "__floorx";
+            sprintf(fn_name, "__div%sf3", compiler_rt_type_abbrev);
             break;
         case IrBinOpRemRem:
-            param_count = 1;
-            func_name = "__remx";
-            break;
         case IrBinOpRemMod:
-            param_count = 1;
-            func_name = "__modx";
+            sprintf(fn_name, "%sfmod%s", math_float_prefix, math_float_suffix);
             break;
         default:
             zig_unreachable();
     }
 
-    LLVMValueRef func_ref = get_soft_f80_bin_op_func(g, func_name, param_count, return_type);
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, param_count, operand_type->llvm_type, return_type);
 
     LLVMValueRef result;
     if (vector_len == 0) {
         LLVMValueRef params[2] = {op1_value, op2_value};
         result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
-        result = add_f80_icmp(g, result, res_icmp);
+        result = add_icmp(g, result, res_icmp);
     } else {
-        ZigType *alloca_ty = op1->value->type;
+        ZigType *alloca_ty = operand_type;
         if (res_icmp != NONE) alloca_ty = get_vector_type(g, vector_len, g->builtin_types.entry_bool);
         result = build_alloca(g, alloca_ty, "", 0);
-    }
-
-    LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
-    for (uint32_t i = 0; i < vector_len; i++) {
-        LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-        LLVMValueRef params[2] = {
-            LLVMBuildExtractElement(g->builder, op1_value, index_value, ""),
-            LLVMBuildExtractElement(g->builder, op2_value, index_value, ""),
-        };
-        LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
-        call_result = add_f80_icmp(g, call_result, res_icmp);
-        LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
-            call_result, index_value, "");
-    }
-
-    if (div_exact_safety_check) {
-        // Safety check: a / b == floor(a / b)
-        LLVMValueRef floor_func = get_soft_f80_bin_op_func(g, "__floorx", 1, return_type);
-        LLVMValueRef eq_func = get_soft_f80_bin_op_func(g, "__eqxf2", 2, g->builtin_types.entry_i32->llvm_type);
-
-        LLVMValueRef ok_bit;
-        if (vector_len == 0) {
-            LLVMValueRef floored = LLVMBuildCall(g->builder, floor_func, &result, 1, "");
-
-            LLVMValueRef params[2] = {result, floored};
-            ok_bit = LLVMBuildCall(g->builder, eq_func, params, 2, "");
-        } else {
-            ZigType *bool_vec_ty = get_vector_type(g, vector_len, g->builtin_types.entry_bool);
-            ok_bit = build_alloca(g, bool_vec_ty, "", 0);
-        }
 
+        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
         for (uint32_t i = 0; i < vector_len; i++) {
             LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-            LLVMValueRef div_res = LLVMBuildExtractElement(g->builder,
-                LLVMBuildLoad(g->builder, result, ""), index_value, "");
-
             LLVMValueRef params[2] = {
-                div_res,
-                LLVMBuildCall(g->builder, floor_func, &div_res, 1, ""),
+                LLVMBuildExtractElement(g->builder, op1_value, index_value, ""),
+                LLVMBuildExtractElement(g->builder, op2_value, index_value, ""),
             };
-            LLVMValueRef cmp_res = LLVMBuildCall(g->builder, eq_func, params, 2, "");
-            cmp_res = LLVMBuildTrunc(g->builder, cmp_res, g->builtin_types.entry_bool->llvm_type, "");
-            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, ok_bit, ""),
-                cmp_res, index_value, "");
+            LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
+            call_result = add_icmp(g, call_result, res_icmp);
+            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
+                call_result, index_value, "");
         }
 
-        if (vector_len != 0) {
-            ok_bit = ZigLLVMBuildAndReduce(g->builder, LLVMBuildLoad(g->builder, ok_bit, ""));
-        }
-        LLVMBasicBlockRef ok_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactOk");
-        LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactFail");
+        result = LLVMBuildLoad(g->builder, result, "");
+    }
 
-        LLVMBuildCondBr(g->builder, ok_bit, ok_block, fail_block);
+    // Some operations are implemented as compound ops and require us to perform some 
+    // more operations before we obtain the final result
+    switch (op_id) {
+        case IrBinOpDivTrunc:
+            return gen_float_un_op(g, result, operand_type, BuiltinFnIdTrunc);
+        case IrBinOpDivFloor:
+            return gen_float_un_op(g, result, operand_type, BuiltinFnIdFloor);
+        case IrBinOpRemMod:
+            {
+                LLVMValueRef b = gen_soft_float_bin_op(g, result, op2_value, operand_type, IrBinOpAdd);
+                LLVMValueRef wrapped_result = gen_soft_float_bin_op(g, b, op2_value, operand_type, IrBinOpRemRem);
+                LLVMValueRef zero = LLVMConstNull(operand_type->llvm_type);
+                LLVMValueRef ltz = gen_soft_float_bin_op(g, op1_value, zero, operand_type, IrBinOpCmpLessThan);
 
-        LLVMPositionBuilderAtEnd(g->builder, fail_block);
-        gen_safety_crash(g, PanicMsgIdExactDivisionRemainder);
+                return LLVMBuildSelect(g->builder, ltz, wrapped_result, result, "");
+            }
+        case IrBinOpDivExact:
+            {
+                LLVMValueRef floored = gen_float_un_op(g, result, operand_type, BuiltinFnIdFloor);
+                LLVMValueRef ok_bit = gen_soft_float_bin_op(g, result, floored, operand_type, IrBinOpCmpEq);
+                if (vector_len != 0) {
+                    ok_bit = ZigLLVMBuildAndReduce(g->builder, ok_bit);
+                }
 
-        LLVMPositionBuilderAtEnd(g->builder, ok_block);
-    }
+                LLVMBasicBlockRef ok_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactOk");
+                LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactFail");
+                LLVMBuildCondBr(g->builder, ok_bit, ok_block, fail_block);
 
-    if (vector_len != 0) {
-        result = LLVMBuildLoad(g->builder, result, "");
+                LLVMPositionBuilderAtEnd(g->builder, fail_block);
+                gen_safety_crash(g, PanicMsgIdExactDivisionRemainder);
+
+                LLVMPositionBuilderAtEnd(g->builder, ok_block);
+            }
+            return result;
+        default:
+            return result;
     }
-    return result;
+    zig_unreachable();
 }
 
 static LLVMValueRef ir_render_bin_op(CodeGen *g, Stage1Air *executable,
@@ -3519,8 +3641,13 @@ static LLVMValueRef ir_render_bin_op(CodeGen *g, Stage1Air *executable,
 
     ZigType *operand_type = op1->value->type;
     ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ? operand_type->data.vector.elem_type : operand_type;
-    if (scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) {
-        return ir_render_soft_f80_bin_op(g, executable, bin_op_instruction);
+    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        // LLVM incorrectly lowers the soft float calls for f128 as if they operated on `long double`.
+        // On some targets this will be incorrect, so we manually lower the call ourselves.
+        LLVMValueRef op1_value = ir_llvm_value(g, op1);
+        LLVMValueRef op2_value = ir_llvm_value(g, op2);
+        return gen_soft_float_bin_op(g, op1_value, op2_value, operand_type, op_id);
     }
 
 
@@ -3828,10 +3955,17 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,
             }
         case CastOpIntToFloat:
             assert(actual_type->id == ZigTypeIdInt);
-            if (actual_type->data.integral.is_signed) {
-                return LLVMBuildSIToFP(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
-            } else {
-                return LLVMBuildUIToFP(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+            {
+                if ((wanted_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+                    (wanted_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                    return gen_soft_int_to_float_op(g, expr_val, actual_type, wanted_type);
+                } else {
+                    if (actual_type->data.integral.is_signed) {
+                        return LLVMBuildSIToFP(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+                    } else {
+                        return LLVMBuildUIToFP(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+                    }
+                }
             }
         case CastOpFloatToInt: {
             assert(wanted_type->id == ZigTypeIdInt);
@@ -3840,18 +3974,28 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,
             bool want_safety = ir_want_runtime_safety(g, &cast_instruction->base);
 
             LLVMValueRef result;
-            if (wanted_type->data.integral.is_signed) {
-                result = LLVMBuildFPToSI(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+            if ((actual_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+                (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                result = gen_soft_float_to_int_op(g, expr_val, actual_type, wanted_type);
             } else {
-                result = LLVMBuildFPToUI(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+                if (wanted_type->data.integral.is_signed) {
+                    result = LLVMBuildFPToSI(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+                } else {
+                    result = LLVMBuildFPToUI(g->builder, expr_val, get_llvm_type(g, wanted_type), "");
+                }
             }
 
             if (want_safety) {
                 LLVMValueRef back_to_float;
-                if (wanted_type->data.integral.is_signed) {
-                    back_to_float = LLVMBuildSIToFP(g->builder, result, LLVMTypeOf(expr_val), "");
+                if ((actual_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+                    (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                    back_to_float = gen_soft_int_to_float_op(g, result, wanted_type, actual_type);
                 } else {
-                    back_to_float = LLVMBuildUIToFP(g->builder, result, LLVMTypeOf(expr_val), "");
+                    if (wanted_type->data.integral.is_signed) {
+                        back_to_float = LLVMBuildSIToFP(g->builder, result, LLVMTypeOf(expr_val), "");
+                    } else {
+                        back_to_float = LLVMBuildUIToFP(g->builder, result, LLVMTypeOf(expr_val), "");
+                    }
                 }
                 LLVMValueRef difference = LLVMBuildFSub(g->builder, expr_val, back_to_float, "");
                 LLVMValueRef one_pos = LLVMConstReal(LLVMTypeOf(expr_val), 1.0f);
@@ -4151,42 +4295,46 @@ static LLVMValueRef ir_render_binary_not(CodeGen *g, Stage1Air *executable,
     return LLVMBuildNot(g->builder, operand, "");
 }
 
-static LLVMValueRef ir_gen_soft_f80_neg(CodeGen *g, ZigType *op_type, LLVMValueRef operand) {
-    uint32_t vector_len = op_type->id == ZigTypeIdVector ? op_type->data.vector.len : 0;
+static LLVMValueRef gen_soft_float_neg(CodeGen *g, ZigType *operand_type, LLVMValueRef operand) {
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+    uint16_t num_bits = operand_type->data.floating.bit_count;
 
-    LLVMTypeRef llvm_i80 = LLVMIntType(80);
-    LLVMValueRef sign_mask = LLVMConstInt(llvm_i80, 1, false);
-    sign_mask = LLVMConstShl(sign_mask, LLVMConstInt(llvm_i80, 79, false));
+    ZigType *iX_type = get_int_type(g, true, num_bits);
+    LLVMValueRef sign_mask = LLVMConstInt(iX_type->llvm_type, 1, false);
+    sign_mask = LLVMConstShl(sign_mask, LLVMConstInt(iX_type->llvm_type, num_bits - 1, false));
 
-    LLVMValueRef result;
     if (vector_len == 0) {
-        result = LLVMBuildXor(g->builder, operand, sign_mask, "");
+        LLVMValueRef bitcasted_operand = LLVMBuildBitCast(g->builder, operand, iX_type->llvm_type, "");
+        LLVMValueRef result = LLVMBuildXor(g->builder, bitcasted_operand, sign_mask, "");
+
+        return LLVMBuildBitCast(g->builder, result, operand_type->llvm_type, "");
     } else {
-        result = build_alloca(g, op_type, "", 0);
-    }
+        LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
+        ZigType *iX_vector_type = get_vector_type(g, vector_len, iX_type);
 
-    LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
-    for (uint32_t i = 0; i < vector_len; i++) {
-        LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-        LLVMValueRef xor_operand = LLVMBuildExtractElement(g->builder, operand, index_value, "");
-        LLVMValueRef xor_result = LLVMBuildXor(g->builder, xor_operand, sign_mask, "");
-        LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
-            xor_result, index_value, "");
-    }
-    if (vector_len != 0) {
-        result = LLVMBuildLoad(g->builder, result, "");
+        LLVMValueRef result = build_alloca(g, iX_vector_type, "", 0);
+        LLVMValueRef bitcasted_operand = LLVMBuildBitCast(g->builder, operand, iX_vector_type->llvm_type, "");
+        for (uint32_t i = 0; i < vector_len; i++) {
+            LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
+            LLVMValueRef elem = LLVMBuildExtractElement(g->builder, bitcasted_operand, index_value, "");
+            LLVMValueRef result_elem = LLVMBuildXor(g->builder, elem, sign_mask, "");
+            LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
+                result_elem, index_value, "");
+        }
+        return LLVMBuildBitCast(g->builder, LLVMBuildLoad(g->builder, result, ""), operand_type->llvm_type, "");
     }
-    return result;
 }
 
-static LLVMValueRef ir_gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst *operand, bool wrapping) {
+static LLVMValueRef gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst *operand, bool wrapping) {
     LLVMValueRef llvm_operand = ir_llvm_value(g, operand);
     ZigType *operand_type = operand->value->type;
     ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ?
         operand_type->data.vector.elem_type : operand_type;
 
-    if (scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target))
-        return ir_gen_soft_f80_neg(g, operand_type, llvm_operand);
+    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        return gen_soft_float_neg(g, operand_type, llvm_operand);
+    }
 
     if (scalar_type->id == ZigTypeIdFloat) {
         ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, inst));
@@ -4210,7 +4358,7 @@ static LLVMValueRef ir_gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirIn
 static LLVMValueRef ir_render_negation(CodeGen *g, Stage1Air *executable,
         Stage1AirInstNegation *inst)
 {
-    return ir_gen_negation(g, &inst->base, inst->operand, inst->wrapping);
+    return gen_negation(g, &inst->base, inst->operand, inst->wrapping);
 }
 
 static LLVMValueRef ir_render_bool_not(CodeGen *g, Stage1Air *executable, Stage1AirInstBoolNot *instruction) {
@@ -7024,110 +7172,34 @@ static LLVMValueRef ir_render_atomic_store(CodeGen *g, Stage1Air *executable,
     return nullptr;
 }
 
-static LLVMValueRef ir_render_soft_f80_float_op(CodeGen *g, Stage1Air *executable, Stage1AirInstFloatOp *instruction) {
-    ZigType *op_type = instruction->operand->value->type;
-    uint32_t vector_len = op_type->id == ZigTypeIdVector ? op_type->data.vector.len : 0;
-
-    const char *func_name;
-    switch (instruction->fn_id) {
-        case BuiltinFnIdSqrt:
-            func_name = "__sqrtx";
-            break;
-        case BuiltinFnIdSin:
-            func_name = "__sinx";
-            break;
-        case BuiltinFnIdCos:
-            func_name = "__cosx";
-            break;
-        case BuiltinFnIdExp:
-            func_name = "__expx";
-            break;
-        case BuiltinFnIdExp2:
-            func_name = "__exp2x";
-            break;
-        case BuiltinFnIdLog:
-            func_name = "__logx";
-            break;
-        case BuiltinFnIdLog2:
-            func_name = "__log2x";
-            break;
-        case BuiltinFnIdLog10:
-            func_name = "__log10x";
-            break;
-        case BuiltinFnIdFabs:
-            func_name = "__fabsx";
-            break;
-        case BuiltinFnIdFloor:
-            func_name = "__floorx";
-            break;
-        case BuiltinFnIdCeil:
-            func_name = "__ceilx";
-            break;
-        case BuiltinFnIdTrunc:
-            func_name = "__truncx";
-            break;
-        case BuiltinFnIdNearbyInt:
-            func_name = "__nearbyintx";
-            break;
-        case BuiltinFnIdRound:
-            func_name = "__roundx";
-            break;
-        default:
-            zig_unreachable();
-    }
-
-
-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, func_name);
-    if (func_ref == nullptr) {
-        LLVMTypeRef f80_ref = g->builtin_types.entry_f80->llvm_type;
-        LLVMTypeRef fn_type = LLVMFunctionType(f80_ref, &f80_ref, 1, false);
-        func_ref = LLVMAddFunction(g->module, func_name, fn_type);
-    }
-
-    LLVMValueRef operand = ir_llvm_value(g, instruction->operand);
-    LLVMValueRef result;
-    if (vector_len == 0) {
-        result = LLVMBuildCall(g->builder, func_ref, &operand, 1, "");
-    } else {
-        result = build_alloca(g, instruction->operand->value->type, "", 0);
-    }
-
-    LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
-    for (uint32_t i = 0; i < vector_len; i++) {
-        LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
-        LLVMValueRef param = LLVMBuildExtractElement(g->builder, operand, index_value, "");
-        LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, &param, 1, "");
-        LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
-            call_result, index_value, "");
-    }
-    if (vector_len != 0) {
-        result = LLVMBuildLoad(g->builder, result, "");
-    }
-    return result;
-}
-
 static LLVMValueRef ir_render_float_op(CodeGen *g, Stage1Air *executable, Stage1AirInstFloatOp *instruction) {
-    ZigType *op_type = instruction->operand->value->type;
-    op_type = op_type->id == ZigTypeIdVector ? op_type->data.vector.elem_type : op_type;
-    if (op_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) {
-        return ir_render_soft_f80_float_op(g, executable, instruction);
-    }
     LLVMValueRef operand = ir_llvm_value(g, instruction->operand);
-    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value->type, ZigLLVMFnIdFloatOp, instruction->fn_id);
-    return LLVMBuildCall(g->builder, fn_val, &operand, 1, "");
+    ZigType *operand_type = instruction->operand->value->type;
+    return gen_float_un_op(g, operand, operand_type, instruction->fn_id);
 }
 
-static LLVMValueRef ir_render_soft_f80_mul_add(CodeGen *g, Stage1Air *executable, Stage1AirInstMulAdd *instruction) {
-    ZigType *op_type = instruction->op1->value->type;
-    uint32_t vector_len = op_type->id == ZigTypeIdVector ? op_type->data.vector.len : 0;
+static LLVMValueRef ir_render_soft_mul_add(CodeGen *g, Stage1Air *executable, Stage1AirInstMulAdd *instruction, ZigType *float_type) {
+    ZigType *operand_type = instruction->op1->value->type;
+    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+
+    const char *fn_name;
+    if (float_type == g->builtin_types.entry_f32)
+        fn_name = "fmaf";
+    else if (float_type == g->builtin_types.entry_f64)
+        fn_name = "fma";
+    else if (float_type == g->builtin_types.entry_f80)
+        fn_name = "__fmax";
+    else if (float_type == g->builtin_types.entry_f128)
+        fn_name = "fmaq";
+    else
+        zig_unreachable();
 
-    const char *func_name = "__fmax";
-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, func_name);
+    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
     if (func_ref == nullptr) {
-        LLVMTypeRef f80_ref = g->builtin_types.entry_f80->llvm_type;
-        LLVMTypeRef params[3] = { f80_ref, f80_ref, f80_ref };
-        LLVMTypeRef fn_type = LLVMFunctionType(f80_ref, params, 3, false);
-        func_ref = LLVMAddFunction(g->module, func_name, fn_type);
+        LLVMTypeRef float_type_ref = float_type->llvm_type;
+        LLVMTypeRef params[3] = { float_type_ref, float_type_ref, float_type_ref };
+        LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, params, 3, false);
+        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
     }
 
     LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
@@ -7161,10 +7233,11 @@ static LLVMValueRef ir_render_soft_f80_mul_add(CodeGen *g, Stage1Air *executable
 }
 
 static LLVMValueRef ir_render_mul_add(CodeGen *g, Stage1Air *executable, Stage1AirInstMulAdd *instruction) {
-    ZigType *op_type = instruction->op1->value->type;
-    op_type = op_type->id == ZigTypeIdVector ? op_type->data.vector.elem_type : op_type;
-    if (op_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) {
-        return ir_render_soft_f80_mul_add(g, executable, instruction);
+    ZigType *operand_type = instruction->op1->value->type;
+    operand_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
+    if ((operand_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (operand_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        return ir_render_soft_mul_add(g, executable, instruction, operand_type);
     }
     LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
     LLVMValueRef op2 = ir_llvm_value(g, instruction->op2);
@@ -9513,10 +9586,13 @@ static void define_builtin_types(CodeGen *g) {
     switch (g->zig_target->arch) {
         case ZigLLVM_x86:
         case ZigLLVM_x86_64:
-            if (g->zig_target->abi != ZigLLVM_MSVC)
+            if (g->zig_target->abi != ZigLLVM_MSVC) {
                 add_fp_entry(g, "c_longdouble", 80, LLVMX86FP80Type(), &g->builtin_types.entry_c_longdouble);
-            else
+                g->builtin_types.entry_c_longdouble->abi_size = g->builtin_types.entry_f80->abi_size;
+                g->builtin_types.entry_c_longdouble->abi_align = g->builtin_types.entry_f80->abi_align;
+            } else {
                 add_fp_entry(g, "c_longdouble", 64, LLVMDoubleType(), &g->builtin_types.entry_c_longdouble);
+            }
             break;
         case ZigLLVM_arm:
         case ZigLLVM_armeb:
@@ -9750,6 +9826,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdSqrt, "sqrt", 1);
     create_builtin_fn(g, BuiltinFnIdSin, "sin", 1);
     create_builtin_fn(g, BuiltinFnIdCos, "cos", 1);
+    create_builtin_fn(g, BuiltinFnIdTan, "tan", 1);
     create_builtin_fn(g, BuiltinFnIdExp, "exp", 1);
     create_builtin_fn(g, BuiltinFnIdExp2, "exp2", 1);
     create_builtin_fn(g, BuiltinFnIdLog, "log", 1);
diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp
index b8ae1ea93eb9..f7ab5e12fa69 100644
--- a/src/stage1/ir.cpp
+++ b/src/stage1/ir.cpp
@@ -24132,6 +24132,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f16 = zig_double_to_f16(cos(zig_f16_to_double(op->data.x_f16)));
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f16 = zig_double_to_f16(tan(zig_f16_to_double(op->data.x_f16)));
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f16 = zig_double_to_f16(exp(zig_f16_to_double(op->data.x_f16)));
             break;
@@ -24181,6 +24184,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f32 = cosf(op->data.x_f32);
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f32 = tanf(op->data.x_f32);
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f32 = expf(op->data.x_f32);
             break;
@@ -24230,6 +24236,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f64 = cos(op->data.x_f64);
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f64 = tan(op->data.x_f64);
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f64 = exp(op->data.x_f64);
             break;
@@ -24293,6 +24302,7 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdNearbyInt:
         case BuiltinFnIdSin:
         case BuiltinFnIdCos:
+        case BuiltinFnIdTan:
         case BuiltinFnIdExp:
         case BuiltinFnIdExp2:
         case BuiltinFnIdLog:
@@ -24300,7 +24310,7 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdLog2:
             return ir_add_error_node(ira, source_node,
                 buf_sprintf("compiler bug: TODO: implement '%s' for type '%s'. See https://github.com/ziglang/zig/issues/4026",
-                    float_op_to_name(fop), buf_ptr(&float_type->name)));
+                    float_un_op_to_name(fop), buf_ptr(&float_type->name)));
         default:
             zig_unreachable();
         }
@@ -24327,24 +24337,94 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
             break;
         case BuiltinFnIdCeil:
             f128M_roundToInt(in, softfloat_round_max, false, out);
-        break;
+            break;
         case BuiltinFnIdTrunc:
             f128M_trunc(in, out);
             break;
         case BuiltinFnIdRound:
             f128M_roundToInt(in, softfloat_round_near_maxMag, false, out);
             break;
-        case BuiltinFnIdNearbyInt:
-        case BuiltinFnIdSin:
-        case BuiltinFnIdCos:
-        case BuiltinFnIdExp:
-        case BuiltinFnIdExp2:
-        case BuiltinFnIdLog:
-        case BuiltinFnIdLog10:
-        case BuiltinFnIdLog2:
-            return ir_add_error_node(ira, source_node,
-                buf_sprintf("compiler bug: TODO: implement '%s' for type '%s'. See https://github.com/ziglang/zig/issues/4026",
-                    float_op_to_name(fop), buf_ptr(&float_type->name)));
+        case BuiltinFnIdNearbyInt: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = nearbyint(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdSin: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = sin(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdCos: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = cos(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdTan: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = tan(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdExp: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = exp(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdExp2: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = exp2(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdLog: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = log(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdLog10: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = log10(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
+        case BuiltinFnIdLog2: {
+            float64_t f64_value = f128M_to_f64(in);
+            double double_value;
+            memcpy(&double_value, &f64_value, sizeof(double));
+            double_value = log2(double_value);
+            memcpy(&f64_value, &double_value, sizeof(double));
+            f64_to_f128M(f64_value, out);
+            break;
+        }
         default:
             zig_unreachable();
         }
diff --git a/src/stage1/ir_print.cpp b/src/stage1/ir_print.cpp
index 5c7727da0c08..9296242a3e7b 100644
--- a/src/stage1/ir_print.cpp
+++ b/src/stage1/ir_print.cpp
@@ -2558,13 +2558,13 @@ static void ir_print_add_implicit_return_type(IrPrintSrc *irp, Stage1ZirInstAddI
 }
 
 static void ir_print_float_op(IrPrintSrc *irp, Stage1ZirInstFloatOp *instruction) {
-    fprintf(irp->f, "@%s(", float_op_to_name(instruction->fn_id));
+    fprintf(irp->f, "@%s(", float_un_op_to_name(instruction->fn_id));
     ir_print_other_inst_src(irp, instruction->operand);
     fprintf(irp->f, ")");
 }
 
 static void ir_print_float_op(IrPrintGen *irp, Stage1AirInstFloatOp *instruction) {
-    fprintf(irp->f, "@%s(", float_op_to_name(instruction->fn_id));
+    fprintf(irp->f, "@%s(", float_un_op_to_name(instruction->fn_id));
     ir_print_other_inst_gen(irp, instruction->operand);
     fprintf(irp->f, ")");
 }
diff --git a/src/translate_c.zig b/src/translate_c.zig
index e09ebea4d71c..0139ec8ec3ec 100644
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@@ -3998,7 +3998,7 @@ fn transFloatingLiteral(c: *Context, scope: *Scope, expr: *const clang.FloatingL
     var dbl = expr.getValueAsApproximateDouble();
     const is_negative = dbl < 0;
     if (is_negative) dbl = -dbl;
-    const str = if (dbl == std.math.floor(dbl))
+    const str = if (dbl == @floor(dbl))
         try std.fmt.allocPrint(c.arena, "{d}.0", .{dbl})
     else
         try std.fmt.allocPrint(c.arena, "{d}", .{dbl});
diff --git a/src/value.zig b/src/value.zig
index bb7b74229051..d2de389de9e1 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1155,6 +1155,7 @@ pub const Value = extern union {
                 16 => return floatWriteToMemory(f16, val.toFloat(f16), target, buffer),
                 32 => return floatWriteToMemory(f32, val.toFloat(f32), target, buffer),
                 64 => return floatWriteToMemory(f64, val.toFloat(f64), target, buffer),
+                80 => return floatWriteToMemory(f80, val.toFloat(f80), target, buffer),
                 128 => return floatWriteToMemory(f128, val.toFloat(f128), target, buffer),
                 else => unreachable,
             },
@@ -1379,25 +1380,21 @@ pub const Value = extern union {
     }
 
     fn floatWriteToMemory(comptime F: type, f: F, target: Target, buffer: []u8) void {
+        const endian = target.cpu.arch.endian();
         if (F == f80) {
-            switch (target.cpu.arch) {
-                .i386, .x86_64 => {
-                    const repr = std.math.break_f80(f);
-                    std.mem.writeIntLittle(u64, buffer[0..8], repr.fraction);
-                    std.mem.writeIntLittle(u16, buffer[8..10], repr.exp);
-                    // TODO set the rest of the bytes to undefined. should we use 0xaa
-                    // or is there a different way?
-                    return;
-                },
-                else => {},
-            }
+            const repr = std.math.break_f80(f);
+            std.mem.writeInt(u64, buffer[0..8], repr.fraction, endian);
+            std.mem.writeInt(u16, buffer[8..10], repr.exp, endian);
+            // TODO set the rest of the bytes to undefined. should we use 0xaa
+            // or is there a different way?
+            return;
         }
         const Int = @Type(.{ .Int = .{
             .signedness = .unsigned,
             .bits = @typeInfo(F).Float.bits,
         } });
         const int = @bitCast(Int, f);
-        std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], int, target.cpu.arch.endian());
+        std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], int, endian);
     }
 
     fn floatReadFromMemory(comptime F: type, target: Target, buffer: []const u8) F {
@@ -2869,9 +2866,7 @@ pub const Value = extern union {
             16 => return Value.Tag.float_16.create(arena, @intToFloat(f16, x)),
             32 => return Value.Tag.float_32.create(arena, @intToFloat(f32, x)),
             64 => return Value.Tag.float_64.create(arena, @intToFloat(f64, x)),
-            // We can't lower this properly on non-x86 llvm backends yet
-            //80 => return Value.Tag.float_80.create(arena, @intToFloat(f80, x)),
-            80 => @panic("TODO f80 intToFloat"),
+            80 => return Value.Tag.float_80.create(arena, @intToFloat(f80, x)),
             128 => return Value.Tag.float_128.create(arena, @intToFloat(f128, x)),
             else => unreachable,
         }
@@ -2908,9 +2903,9 @@ pub const Value = extern union {
         }
 
         const isNegative = std.math.signbit(value);
-        value = std.math.fabs(value);
+        value = @fabs(value);
 
-        const floored = std.math.floor(value);
+        const floored = @floor(value);
 
         var rational = try std.math.big.Rational.init(arena);
         defer rational.deinit();
@@ -2941,7 +2936,7 @@ pub const Value = extern union {
             return 1;
         }
 
-        const w_value = std.math.fabs(scalar);
+        const w_value = @fabs(scalar);
         return @divFloor(@floatToInt(std.math.big.Limb, std.math.log2(w_value)), @typeInfo(std.math.big.Limb).Int.bits) + 1;
     }
 
@@ -3737,9 +3732,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @rem(lhs_val, rhs_val));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __remx");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @rem(lhs_val, rhs_val));
@@ -3782,9 +3774,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @mod(lhs_val, rhs_val));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __modx");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @mod(lhs_val, rhs_val));
@@ -4198,9 +4187,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, lhs_val / rhs_val);
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __divxf3");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, lhs_val / rhs_val);
@@ -4255,9 +4241,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @divFloor(lhs_val, rhs_val));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __floorx");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @divFloor(lhs_val, rhs_val));
@@ -4312,9 +4295,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @divTrunc(lhs_val, rhs_val));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __truncx");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @divTrunc(lhs_val, rhs_val));
@@ -4369,9 +4349,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __mulxf3");
-                }
                 const lhs_val = lhs.toFloat(f80);
                 const rhs_val = rhs.toFloat(f80);
                 return Value.Tag.float_80.create(arena, lhs_val * rhs_val);
@@ -4411,16 +4388,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @sqrt(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt __sqrtx");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @sqrt(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt sqrtq");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @sqrt(f));
             },
@@ -4454,16 +4425,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @sin(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt sin for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @sin(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt sin for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @sin(f));
             },
@@ -4497,16 +4462,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @cos(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt cos for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @cos(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt cos for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @cos(f));
             },
@@ -4514,6 +4473,43 @@ pub const Value = extern union {
         }
     }
 
+    pub fn tan(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
+        if (float_type.zigTypeTag() == .Vector) {
+            const result_data = try arena.alloc(Value, float_type.vectorLen());
+            for (result_data) |*scalar, i| {
+                scalar.* = try tanScalar(val.indexVectorlike(i), float_type.scalarType(), arena, target);
+            }
+            return Value.Tag.aggregate.create(arena, result_data);
+        }
+        return tanScalar(val, float_type, arena, target);
+    }
+
+    pub fn tanScalar(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
+        switch (float_type.floatBits(target)) {
+            16 => {
+                const f = val.toFloat(f16);
+                return Value.Tag.float_16.create(arena, @tan(f));
+            },
+            32 => {
+                const f = val.toFloat(f32);
+                return Value.Tag.float_32.create(arena, @tan(f));
+            },
+            64 => {
+                const f = val.toFloat(f64);
+                return Value.Tag.float_64.create(arena, @tan(f));
+            },
+            80 => {
+                const f = val.toFloat(f80);
+                return Value.Tag.float_80.create(arena, @tan(f));
+            },
+            128 => {
+                const f = val.toFloat(f128);
+                return Value.Tag.float_128.create(arena, @tan(f));
+            },
+            else => unreachable,
+        }
+    }
+
     pub fn exp(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
         if (float_type.zigTypeTag() == .Vector) {
             const result_data = try arena.alloc(Value, float_type.vectorLen());
@@ -4540,16 +4536,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @exp(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt exp for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @exp(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt exp for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @exp(f));
             },
@@ -4583,16 +4573,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @exp2(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt exp2 for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @exp2(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt exp2 for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @exp2(f));
             },
@@ -4626,16 +4610,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @log(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @log(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @log(f));
             },
@@ -4669,16 +4647,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @log2(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log2 for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @log2(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log2 for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @log2(f));
             },
@@ -4712,16 +4684,10 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @log10(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log10 for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @log10(f));
             },
             128 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt log10 for f128");
-                }
                 const f = val.toFloat(f128);
                 return Value.Tag.float_128.create(arena, @log10(f));
             },
@@ -4755,9 +4721,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @fabs(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt fabs for f80 (__fabsx)");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @fabs(f));
             },
@@ -4795,9 +4758,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @floor(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt floor for f80 (__floorx)");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @floor(f));
             },
@@ -4835,9 +4795,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @ceil(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt ceil for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @ceil(f));
             },
@@ -4875,9 +4832,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @round(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt round for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @round(f));
             },
@@ -4915,9 +4869,6 @@ pub const Value = extern union {
                 return Value.Tag.float_64.create(arena, @trunc(f));
             },
             80 => {
-                if (true) {
-                    @panic("TODO implement compiler_rt trunc for f80");
-                }
                 const f = val.toFloat(f80);
                 return Value.Tag.float_80.create(arena, @trunc(f));
             },
diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp
index 0b0b33b8d9db..78082abf8815 100644
--- a/src/zig_llvm.cpp
+++ b/src/zig_llvm.cpp
@@ -541,6 +541,10 @@ LLVMValueRef ZigLLVMBuildUShlSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRe
     return wrap(call_inst);
 }
 
+LLVMValueRef LLVMBuildVectorSplat(LLVMBuilderRef B, unsigned elem_count, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateVectorSplat(elem_count, unwrap(V), Name));
+}
+
 void ZigLLVMFnSetSubprogram(LLVMValueRef fn, ZigLLVMDISubprogram *subprogram) {
     assert( isa<Function>(unwrap(fn)) );
     Function *unwrapped_function = reinterpret_cast<Function*>(unwrap(fn));
diff --git a/src/zig_llvm.h b/src/zig_llvm.h
index 63d184c41703..90dcd1de3907 100644
--- a/src/zig_llvm.h
+++ b/src/zig_llvm.h
@@ -149,6 +149,7 @@ ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildSMulFixSat(LLVMBuilderRef B, LLVMValueRef
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildUMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name);
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildUShlSat(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS, const char* name);
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildSShlSat(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS, const char* name);
+ZIG_EXTERN_C LLVMValueRef LLVMBuildVectorSplat(LLVMBuilderRef B, unsigned elem_count, LLVMValueRef V, const char *Name);
 
 
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildNSWShl(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS,
diff --git a/test/behavior/bugs/920.zig b/test/behavior/bugs/920.zig
index 380d42e5deaa..19fce71549fd 100644
--- a/test/behavior/bugs/920.zig
+++ b/test/behavior/bugs/920.zig
@@ -1,5 +1,4 @@
 const std = @import("std");
-const math = std.math;
 const Random = std.rand.Random;
 
 const ZigTable = struct {
@@ -40,10 +39,10 @@ const norm_r = 3.6541528853610088;
 const norm_v = 0.00492867323399;
 
 fn norm_f(x: f64) f64 {
-    return math.exp(-x * x / 2.0);
+    return @exp(-x * x / 2.0);
 }
 fn norm_f_inv(y: f64) f64 {
-    return math.sqrt(-2.0 * math.ln(y));
+    return @sqrt(-2.0 * @log(y));
 }
 fn norm_zero_case(random: *Random, u: f64) f64 {
     _ = random;
@@ -64,6 +63,13 @@ test "bug 920 fixed" {
     };
 
     for (NormalDist1.f) |_, i| {
-        try std.testing.expectEqual(NormalDist1.f[i], NormalDist.f[i]);
+        // Here we use `expectApproxEqAbs` instead of `expectEqual` to account for the small
+        // differences in math functions of different libcs. For example, if the compiler
+        // links against glibc, but the target is musl libc, then these values might be
+        // slightly different.
+        // Arguably, this is a bug in the compiler because comptime should emulate the target,
+        // including rounding errors in libc math functions. However that behavior is not
+        // what this particular test is intended to cover.
+        try std.testing.expectApproxEqAbs(NormalDist1.f[i], NormalDist.f[i], @sqrt(std.math.floatEps(f64)));
     }
 }
diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig
index 0700b47c61df..cc978f3b8d1c 100644
--- a/test/behavior/floatop.zig
+++ b/test/behavior/floatop.zig
@@ -609,16 +609,11 @@ test "negation f64" {
 }
 
 test "negation f80" {
-    if (builtin.zig_backend != .stage1) {
-        // This test case exercises @intToFloat f80 in the compiler implementation.
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
-
-    if (builtin.os.tag == .freebsd) {
-        // TODO file issue to track this failure
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
 
     const S = struct {
         fn doTheTest() !void {
@@ -641,11 +636,6 @@ test "negation f128" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
 
-    if (builtin.os.tag == .freebsd) {
-        // TODO file issue to track this failure
-        return error.SkipZigTest;
-    }
-
     const S = struct {
         fn doTheTest() !void {
             var a: f128 = 1;
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
index f5494adfeb61..0479015eee8c 100644
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@@ -6,6 +6,7 @@ const expectEqualSlices = std.testing.expectEqualSlices;
 const maxInt = std.math.maxInt;
 const minInt = std.math.minInt;
 const mem = std.mem;
+const math = std.math;
 
 test "assignment operators" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
@@ -909,32 +910,52 @@ test "comptime float rem int" {
 }
 
 test "remainder division" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
 
     comptime try remdiv(f16);
     comptime try remdiv(f32);
     comptime try remdiv(f64);
+    comptime try remdiv(f80);
     comptime try remdiv(f128);
     try remdiv(f16);
     try remdiv(f64);
+    try remdiv(f80);
     try remdiv(f128);
 }
 
 fn remdiv(comptime T: type) !void {
     try expect(@as(T, 1) == @as(T, 1) % @as(T, 2));
+    try remdivOne(T, 1, 1, 2);
+
     try expect(@as(T, 1) == @as(T, 7) % @as(T, 3));
+    try remdivOne(T, 1, 7, 3);
+}
+
+fn remdivOne(comptime T: type, a: T, b: T, c: T) !void {
+    try expect(a == @rem(b, c));
+    try expect(a == @mod(b, c));
 }
 
 test "float remainder division using @rem" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
 
     comptime try frem(f16);
     comptime try frem(f32);
     comptime try frem(f64);
+    comptime try frem(f80);
     comptime try frem(f128);
     try frem(f16);
     try frem(f32);
     try frem(f64);
+    try frem(f80);
     try frem(f128);
 }
 
@@ -943,29 +964,40 @@ fn frem(comptime T: type) !void {
         f16 => 1.0,
         f32 => 0.001,
         f64 => 0.00001,
+        f80 => 0.000001,
         f128 => 0.0000001,
         else => unreachable,
     };
 
-    try expect(std.math.fabs(@rem(@as(T, 6.9), @as(T, 4.0)) - @as(T, 2.9)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, -6.9), @as(T, 4.0)) - @as(T, -2.9)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, -5.0), @as(T, 3.0)) - @as(T, -2.0)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, 3.0), @as(T, 2.0)) - @as(T, 1.0)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, 1.0), @as(T, 2.0)) - @as(T, 1.0)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, 0.0), @as(T, 1.0)) - @as(T, 0.0)) < epsilon);
-    try expect(std.math.fabs(@rem(@as(T, -0.0), @as(T, 1.0)) - @as(T, -0.0)) < epsilon);
+    try fremOne(T, 6.9, 4.0, 2.9, epsilon);
+    try fremOne(T, -6.9, 4.0, -2.9, epsilon);
+    try fremOne(T, -5.0, 3.0, -2.0, epsilon);
+    try fremOne(T, 3.0, 2.0, 1.0, epsilon);
+    try fremOne(T, 1.0, 2.0, 1.0, epsilon);
+    try fremOne(T, 0.0, 1.0, 0.0, epsilon);
+    try fremOne(T, -0.0, 1.0, -0.0, epsilon);
+}
+
+fn fremOne(comptime T: type, a: T, b: T, c: T, epsilon: T) !void {
+    try expect(@fabs(@rem(a, b) - c) < epsilon);
 }
 
 test "float modulo division using @mod" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
 
     comptime try fmod(f16);
     comptime try fmod(f32);
     comptime try fmod(f64);
+    comptime try fmod(f80);
     comptime try fmod(f128);
     try fmod(f16);
     try fmod(f32);
     try fmod(f64);
+    try fmod(f80);
     try fmod(f128);
 }
 
@@ -974,17 +1006,22 @@ fn fmod(comptime T: type) !void {
         f16 => 1.0,
         f32 => 0.001,
         f64 => 0.00001,
+        f80 => 0.000001,
         f128 => 0.0000001,
         else => unreachable,
     };
 
-    try expect(std.math.fabs(@mod(@as(T, 6.9), @as(T, 4.0)) - @as(T, 2.9)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, -6.9), @as(T, 4.0)) - @as(T, 1.1)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, -5.0), @as(T, 3.0)) - @as(T, 1.0)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, 3.0), @as(T, 2.0)) - @as(T, 1.0)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, 1.0), @as(T, 2.0)) - @as(T, 1.0)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, 0.0), @as(T, 1.0)) - @as(T, 0.0)) < epsilon);
-    try expect(std.math.fabs(@mod(@as(T, -0.0), @as(T, 1.0)) - @as(T, -0.0)) < epsilon);
+    try fmodOne(T, 6.9, 4.0, 2.9, epsilon);
+    try fmodOne(T, -6.9, 4.0, 1.1, epsilon);
+    try fmodOne(T, -5.0, 3.0, 1.0, epsilon);
+    try fmodOne(T, 3.0, 2.0, 1.0, epsilon);
+    try fmodOne(T, 1.0, 2.0, 1.0, epsilon);
+    try fmodOne(T, 0.0, 1.0, 0.0, epsilon);
+    try fmodOne(T, -0.0, 1.0, -0.0, epsilon);
+}
+
+fn fmodOne(comptime T: type, a: T, b: T, c: T, epsilon: T) !void {
+    try expect(@fabs(@mod(@as(T, a), @as(T, b)) - @as(T, c)) < epsilon);
 }
 
 test "@sqrt" {
@@ -1036,10 +1073,11 @@ test "@fabs" {
 }
 
 test "@fabs f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testFabs(f80, 12.0);
     comptime try testFabs(f80, 12.0);
@@ -1052,7 +1090,11 @@ fn testFabs(comptime T: type, x: T) !void {
 }
 
 test "@floor" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testFloor(f64, 12.0);
     comptime try testFloor(f64, 12.0);
@@ -1068,23 +1110,24 @@ test "@floor" {
 }
 
 test "@floor f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
     try testFloor(f80, 12.0);
     comptime try testFloor(f80, 12.0);
 }
 
 test "@floor f128" {
-    if (builtin.zig_backend == .stage1) {
-        // Fails because it incorrectly lowers to a floorl function call.
-        return error.SkipZigTest;
-    }
-
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    testFloor(f128, 12.0);
+    try testFloor(f128, 12.0);
     comptime try testFloor(f128, 12.0);
 }
 
@@ -1095,7 +1138,11 @@ fn testFloor(comptime T: type, x: T) !void {
 }
 
 test "@ceil" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testCeil(f64, 12.0);
     comptime try testCeil(f64, 12.0);
@@ -1111,24 +1158,24 @@ test "@ceil" {
 }
 
 test "@ceil f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testCeil(f80, 12.0);
     comptime try testCeil(f80, 12.0);
 }
 
 test "@ceil f128" {
-    if (builtin.zig_backend == .stage1) {
-        // Fails because it incorrectly lowers to a ceill function call.
-        return error.SkipZigTest;
-    }
-
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    testCeil(f128, 12.0);
+    try testCeil(f128, 12.0);
     comptime try testCeil(f128, 12.0);
 }
 
@@ -1139,7 +1186,11 @@ fn testCeil(comptime T: type, x: T) !void {
 }
 
 test "@trunc" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testTrunc(f64, 12.0);
     comptime try testTrunc(f64, 12.0);
@@ -1155,10 +1206,11 @@ test "@trunc" {
 }
 
 test "@trunc f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testTrunc(f80, 12.0);
     comptime try testTrunc(f80, 12.0);
@@ -1171,14 +1223,13 @@ test "@trunc f80" {
 }
 
 test "@trunc f128" {
-    if (builtin.zig_backend == .stage1) {
-        // Fails because it incorrectly lowers to a truncl function call.
-        return error.SkipZigTest;
-    }
-
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    testTrunc(f128, 12.0);
+    try testTrunc(f128, 12.0);
     comptime try testTrunc(f128, 12.0);
 }
 
@@ -1197,7 +1248,11 @@ fn testTrunc(comptime T: type, x: T) !void {
 }
 
 test "@round" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testRound(f64, 12.0);
     comptime try testRound(f64, 12.0);
@@ -1213,24 +1268,24 @@ test "@round" {
 }
 
 test "@round f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     try testRound(f80, 12.0);
     comptime try testRound(f80, 12.0);
 }
 
 test "@round f128" {
-    if (builtin.zig_backend == .stage1) {
-        // Fails because it incorrectly lowers to a roundl function call.
-        return error.SkipZigTest;
-    }
-
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    testRound(f128, 12.0);
+    try testRound(f128, 12.0);
     comptime try testRound(f128, 12.0);
 }
 
@@ -1279,17 +1334,19 @@ test "NaN comparison" {
 }
 
 test "NaN comparison f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+
     try testNanEqNan(f80);
     comptime try testNanEqNan(f80);
 }
 
 fn testNanEqNan(comptime F: type) !void {
-    var nan1 = std.math.nan(F);
-    var nan2 = std.math.nan(F);
+    var nan1 = math.nan(F);
+    var nan2 = math.nan(F);
     try expect(nan1 != nan2);
     try expect(!(nan1 == nan2));
     try expect(!(nan1 > nan2));
@@ -1346,3 +1403,55 @@ test "signed zeros are represented properly" {
     try S.doTheTest();
     comptime try S.doTheTest();
 }
+
+test "comptime sin and ln" {
+    const v = comptime (@sin(@as(f32, 1)) + @log(@as(f32, 5)));
+    try expect(v == @sin(@as(f32, 1)) + @log(@as(f32, 5)));
+}
+
+test "fabs" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    inline for ([_]type{ f16, f32, f64, f80, f128, c_longdouble }) |T| {
+        // normals
+        try expect(@fabs(@as(T, 1.0)) == 1.0);
+        try expect(@fabs(@as(T, -1.0)) == 1.0);
+        try expect(@fabs(math.floatMin(T)) == math.floatMin(T));
+        try expect(@fabs(-math.floatMin(T)) == math.floatMin(T));
+        try expect(@fabs(math.floatMax(T)) == math.floatMax(T));
+        try expect(@fabs(-math.floatMax(T)) == math.floatMax(T));
+
+        // subnormals
+        try expect(@fabs(@as(T, 0.0)) == 0.0);
+        try expect(@fabs(@as(T, -0.0)) == 0.0);
+        try expect(@fabs(math.floatTrueMin(T)) == math.floatTrueMin(T));
+        try expect(@fabs(-math.floatTrueMin(T)) == math.floatTrueMin(T));
+
+        // non-finite numbers
+        try expect(math.isPositiveInf(@fabs(math.inf(T))));
+        try expect(math.isPositiveInf(@fabs(-math.inf(T))));
+        try expect(math.isNan(@fabs(math.nan(T))));
+    }
+}
+
+test "absFloat" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    try testAbsFloat();
+    comptime try testAbsFloat();
+}
+fn testAbsFloat() !void {
+    try testAbsFloatOne(-10.05, 10.05);
+    try testAbsFloatOne(10.05, 10.05);
+}
+fn testAbsFloatOne(in: f32, out: f32) !void {
+    try expect(@fabs(@as(f32, in)) == @as(f32, out));
+}
diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig
index a7e7c3b816a3..01ef8c7d297e 100644
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@@ -32,6 +32,7 @@ test "@mulAdd f16" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
     comptime try testMulAdd16();
     try testMulAdd16();
 }
@@ -44,10 +45,12 @@ fn testMulAdd16() !void {
 }
 
 test "@mulAdd f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
 
     comptime try testMulAdd80();
     try testMulAdd80();
@@ -173,10 +176,12 @@ fn vector80() !void {
 }
 
 test "vector f80" {
-    if (true) {
-        // https://github.com/ziglang/zig/issues/11030
-        return error.SkipZigTest;
-    }
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
     comptime try vector80();
     try vector80();