activation / torch-ext /torch_binding.h
TaehyunKim
Fix fused add rms norm (#4)
a1e5ca8 unverified
#pragma once
#include <torch/torch.h>
void poly_norm(torch::Tensor &out, const torch::Tensor &input,
const torch::Tensor &weights, const torch::Tensor &bias,
double eps);
void poly_norm_backward(torch::Tensor &input_grad, torch::Tensor &weight_grad,
torch::Tensor &bias_grad,
const torch::Tensor &output_grad,
const torch::Tensor &input, const torch::Tensor &weight,
double eps);
void rms_norm(torch::Tensor &out, const torch::Tensor &input,
const torch::Tensor &weights, double eps);
void rms_norm_backward(torch::Tensor &input_grad, torch::Tensor &weight_grad,
const torch::Tensor &output_grad,
const torch::Tensor &input, const torch::Tensor &weight,
double eps);
void fused_mul_poly_norm(torch::Tensor &out, const torch::Tensor &input,
const torch::Tensor &mul, const torch::Tensor &weights,
const torch::Tensor &bias, double eps);
void fused_mul_poly_norm_backward(
torch::Tensor &input_grad, torch::Tensor &mul_grad,
torch::Tensor &weight_grad, torch::Tensor &bias_grad,
const torch::Tensor &output_grad, const torch::Tensor &input,
const torch::Tensor &mul, const torch::Tensor &weight,
const torch::Tensor &bias, double eps);
void fused_add_rms_norm(torch::Tensor &out, torch::Tensor &add_out,
const torch::Tensor &input,
const torch::Tensor &residual,
const torch::Tensor &weight, double eps);
void fused_add_rms_norm_backward(torch::Tensor &input_grad,
torch::Tensor &weight_grad,
const torch::Tensor &output_grad,
const torch::Tensor &add_output_grad,
const torch::Tensor &input,
const torch::Tensor &weight, double eps);