importtorchfromkernelsimportget_kernel# Download optimized kernels from the Hugging Face hubactivation=get_kernel("kernels-community/activation")# Random tensorx=torch.randn((10,10),dtype=torch.float16,device="cuda")# Run the kernely=torch.empty_like(x)activation.gelu_fast(y,x)print(y)