Under translator under C2Rust Transpile (main_function.rs + sims.rs)

RetrieverRetriever
7 min read

! This module provides translation for SIMD operations and expressions.

simd.rs

  1. pub fn convert_shuffle_vector: 此公共函数的目标是将 shuffle 操作转换为相应的 Rust SIMD 库调用。该函数首先检查输入参数的个数是否符合预期。如果不符合预期,函数将返回错误。接着,它对两个输入向量进行处理,如果这两个向量的类型或长度不同,同样会返回错误。最后,根据各种条件选择合适的 shuffle 函数,然后调用它。

  2. fn strip_vector_explicit_cast: 这个私有函数的目标是移除明确的向量转换。在一些情况下,向量在内部类型之间转换可能导致问题,尤其是在 shuffle 向量的情况下,这些 shuffle 向量通常是在内建调用上的宏。这个函数就是去除这些转换以简化代码和提高可读性。

  3. fn get_shuffle_vector_mask: 这个私有函数的目标是从 shuffle 向量的“超级内建”调用中获取 mask 表达式。这些表达式包含应用于 mask 表达式的数学偏移(或者是数值常量),我们希望提取这个 mask。

  4. pub fn casting_simd_builtin_call: 这个公共函数的目标是确定是否对 SIMD 调用值进行了转换,因为内建定义会为我们的目的添加多余的转换。函数检查表达式的类型,如果表达式是对 SIMD 的调用并且进行了明确的转换,那么函数将返回 true,否则返回 false。

#![deny(missing_docs)]
//! This module provides translation for SIMD operations and expressions.

// Various imports
use super::*;
use crate::c_ast::BinOp::{Add, BitAnd, ShiftRight};
use crate::c_ast::CExprKind::{Binary, Call, Conditional, ExplicitCast, ImplicitCast, Literal};
use crate::c_ast::CLiteral::Integer;
use crate::c_ast::CTypeKind::{Char, Double, Float, Int, LongLong, Short};
use crate::c_ast::CastKind::{BitCast, IntegralCast};

// This is a list of SIMD functions which were not available in Rust as of version 1.29
static MISSING_SIMD_FUNCTIONS: [&str; 36] = [
    //...
];

// List of SIMD functions that are specific to the x86_64 architecture
static SIMD_X86_64_ONLY: &[&str] = &[
    //...
];

// Function that adds a use statement for a given item within a specific architecture
fn add_arch_use(store: &mut ItemStore, arch_name: &str, item_name: &str) {
    store.add_use_with_attr(
        vec!["core".into(), "arch".into(), arch_name.into()],
        item_name,
        mk().meta_item_attr(
            AttrStyle::Outer,
            mk().meta_list(
                "cfg",
                vec![NestedMeta::Meta(
                    mk().meta_namevalue("target_arch", arch_name),
                )],
            ),
        )
        .pub_(),
    );
}

// This is the primary implementation for the translation of SIMD in the code
impl<'c> Translation<'c> {
    // Checks if a given typedef name corresponds to a SIMD type
    pub fn import_simd_typedef(&self, name: &str) -> TranslationResult<bool> {
        Ok(match name {
            // Handles known SIMD typedefs
            "__m128i" | "__m128" | "__m128d" | "__m64" | "__m256" | "__m256d" | "__m256i" => {
                // __m64 and MMX support were removed from upstream Rust.
                if name == "__m64" {
                    return Err(format_err!(
                        "__m64 and MMX are no longer supported, due to removed upstream support. See https://github.com/immunant/c2rust/issues/369"
                    ).into());
                }

                // Add use statements for the x86 and x86_64 architectures
                self.with_cur_file_item_store(|item_store| {
                    add_arch_use(item_store, "x86", name);
                    add_arch_use(item_store, "x86_64", name);
                });

                true
            }
            // These are C internal types and don't need explicit support
            "__v1di"
            | "__v2si"
            | "__v4hi"
            //...
            => true,
            _ => false,
        })
    }
}

    /// 判断特定的函数名是否为 SIMD 基元。如果是,将会生成相应的 use 语句,并返回 `true`,
    /// 因此无需进一步处理。
    pub fn import_simd_function(&self, name: &str) -> TranslationResult<bool> {
        if name.starts_with("_mm") {
            // REVIEW: 这将在所有 SIMD 函数中进行线性查找。可以使用懒惰静态哈希集
            if MISSING_SIMD_FUNCTIONS.contains(&name) {
                return Err(format_err!(
                    "SIMD 函数 {} 目前没有对应的 Rust 函数",
                    name
                )
                .into());
            }

            // 大部分 x86/64 SIMD 是稳定的,但仍有一些被特性门控限制。
            self.use_feature("stdsimd");

            self.with_cur_file_item_store(|item_store| {
                // REVIEW: 也是线性查找
                if !SIMD_X86_64_ONLY.contains(&name) {
                    add_arch_use(item_store, "x86", name);
                }

                add_arch_use(item_store, "x86_64", name);
            });

            Ok(true)
        } else {
            Ok(false)
        }
    }

    /// 该函数将删除一个隐式转换的 int 或显式转换的 vector,因为对于我们的目的而言,
    /// 这两种转换都是不必要的(且可能带来问题)
    fn clean_int_or_vector_param(&self, expr_id: CExprId) -> CExprId {
        match self.ast_context[expr_id].kind {
            // 出于某种原因,这里似乎有一个错误的隐式转换为 char
            // 可能是内置函数接受一个 char,即使函数接受一个 int
            ImplicitCast(_, expr_id, IntegralCast, _, _) => expr_id,
            // (internal)(external)(vector input)
            ExplicitCast(qty, _, BitCast, _, _) => {
                if let CTypeKind::Vector(..) = self.ast_context.resolve_type(qty.ctype).kind {
                    let (_, stripped_expr_id, _) = self.strip_vector_explicit_cast(expr_id);

                    stripped_expr_id
                } else {
                    expr_id
                }
            }
            _ => expr_id,
        }
    }

    /// 基于内置函数生成对 Rust SIMD 函数的调用。Clang 6 只支持其中一个,
    /// 但 Clang 7 转换了更多的 "super builtins"
    pub fn convert_simd_builtin(
        &self,
        ctx: ExprContext,
        fn_name: &str,
        args: &[CExprId],
    ) -> TranslationResult<WithStmts<Box<Expr>>> {
        self.import_simd_function(fn_name)?;

        let mut processed_args = vec![];
        let (_, first_expr_id, _) = self.strip_vector_explicit_cast(args[0]);
        processed_args.push(first_expr_id);
        processed_args.extend(
            args[1..]
                .iter()
                .map(|arg| self.clean_int_or_vector_param(*arg)),
        );

        let param_translation = self.convert_exprs(ctx.used(), &processed_args)?;
        param_translation.and_then(|call_params| {
            let call = mk().call_expr(mk().ident_expr(fn_name), call_params);

            if ctx.is_used() {
                Ok(WithStmts::new_val(call))
            } else {
                Ok(WithStmts::new(
                    vec![mk().semi_stmt(call)],
                    self.panic_or_err("No value for unused shuffle vector return"),
                ))
            }
        })
    }

    /// 生成一个零值,用于初始化给定的向量类型。类型是由底层元素类型和向量中的元素数量指定的。
    pub fn implicit_vector_default(
        &self,
        ctype: CTypeId,
        len: usize,
        is_static: bool,
    ) -> TranslationResult<WithStmts<Box<Expr>>> {
        // 注意:这只适用于 x86/_64,因此其他架构可能需要某种方式的消歧义导出
        let (fn_name, bytes) = match (&self.ast_context[ctype].kind, len) {
            (Float, 4) => ("_mm_setzero_ps", 16),
            (Float, 8) => ("_mm256_setzero_ps", 32),
            (Double, 2) => ("_mm_setzero_pd", 16),
            (Double, 4) => ("_mm256_setzero_pd", 32),
            (Char, 16) | (Int, 4) | (LongLong, 2) => ("_mm_setzero_si128", 16),
            (Char, 32) | (Int, 8) | (LongLong, 4) => ("_mm256_setzero_si256", 32),
            (Char, 8) | (Int, 2) | (LongLong, 1) => {
                // __m64 在 rust 1.29 时仍然是不稳定的
                self.use_feature("stdsimd");

                ("_mm_setzero_si64", 8)
            }
            (kind, len) => {
                return Err(format_err!(
                    "不支持的向量默认初始化器: {:?} x {}",
                    kind,
                    len
                )
                .into())
            }
        };

        if is_static {
            let zero_expr = mk().lit_expr(mk().int_lit(0, "u8"));
            let n_bytes_expr = mk().lit_expr(mk().int_lit(bytes, ""));
            let expr = mk().repeat_expr(zero_expr, n_bytes_expr);

            Ok(WithStmts::new_unsafe_val(transmute_expr(
                mk().infer_ty(),
                mk().infer_ty(),
                expr,
            )))
        } else {
            self.import_simd_function(fn_name)
                .expect("None of these fns should be unsupported in rust");

            Ok(WithStmts::new_val(
                mk().call_expr(mk().ident_expr(fn_name), Vec::new()),
            ))
        }
    }

    /// 转换对应向量类型的列表初始化器。
    pub fn vector_list_initializer(
        &self,
        ctx: ExprContext,
        ids: &[CExprId],
        ctype: CTypeId,
        len: usize,
    ) -> TranslationResult<WithStmts<Box<Expr>>> {
        let param_translation = self.convert_exprs(ctx, ids)?;
        param_translation.and_then(|mut params| {
            // 在静态环境中使用时,我们不能调用标准函数,因为它们不是 const,所以我们被迫进行转型
            let call = if ctx.is_static {
                let tuple = mk().tuple_expr(params);

                transmute_expr(mk().infer_ty(), mk().infer_ty(), tuple)
            } else {
                let fn_call_name = match (&self.ast_context[ctype].kind, len) {
                    (Float, 4) => "_mm_setr_ps",
                    (Float, 8) => "_mm256_setr_ps",
                    (Double, 2) => "_mm_setr_pd",
                    (Double, 4) => "_mm256_setr_pd",
                    (LongLong, 2) => "_mm_set_epi64x",
                    (LongLong, 4) => "_mm256_setr_epi64x",
                    (Char, 8) => "_mm_setr_pi8",
                    (Char, 16) => "_mm_setr_epi8",
                    (Char, 32) => "_mm256_setr_epi8",
                    (Int, 2) => "_mm_setr_pi32",
                    (Int, 4) => "_mm_setr_epi32",
                    (Int, 8) => "_mm256_setr_epi32",
                    (Short, 4) => "_mm_setr_pi16",
                    (Short, 8) => "_mm_setr_epi16",
                    (Short, 16) => "_mm256_setr_epi16",
                    e => return Err(format_err!("未知的向量初始化列表: {:?}", e).into()),
                };

                self.import_simd_function(fn_call_name)?;

                // rust 缺少对 _mm_setr_epi64x 的支持,因此我们必须对 _mm_set_epi64x 使用反向参数
                if fn_call_name == "_mm_set_epi64x" {
                    params.reverse();
                }

                mk().call_expr(mk().ident_expr(fn_call_name), params)
            };

            if ctx.is_used() {
                Ok(WithStmts::new_val(call))
            } else {
                Ok(WithStmts::new(
                    vec![mk().expr_stmt(call)],
                    self.panic_or_err("未使用的 shuffle vector 返回值没有值"),
                ))
            }
        })
    }

  pub fn convert_shuffle_vector(
        &self,
        ctx: ExprContext,
        child_expr_ids: &[CExprId],
    ) -> TranslationResult<WithStmts<Box<Expr>>> {
//
   }

 fn strip_vector_explicit_cast(&self, expr_id: CExprId) -> (&CTypeKind, CExprId, usize) {
}

main_function.rs

这个模块是用于将C语言的main函数翻译为Rust的。

在该模块中,首先导入了所需的模块和库。

然后定义了一个名为Translation的结构体的实现(impl)。在这个实现中,定义了一个名为convert_main的方法,该方法用于将C语言的main函数转换为Rust的等价表示。

该方法接收一个参数,代表C语言main函数的ID,然后尝试提取其参数、类型等信息,进行相应的转换。

如果成功获取了main函数的信息,则将其转换为Rust中的函数,并生成对应的代码。代码生成过程包括构建函数声明、生成函数体以及处理main函数的参数和返回值。

具体地,代码考虑了main函数的三种形式:无参数、两个参数(argc、argv)以及三个参数(argc、argv和envp)。对于每种形式,都生成了相应的Rust代码。

如果main函数的返回值类型为void,则在Rust中将其视为返回0的函数。否则,会将main函数的返回值转换为i32类型,并通过std::process::exit函数返回。

如果在提取main函数信息的过程中发生错误,或者main函数不是函数类型(例如是一个变量或类型定义),则返回一个包含错误信息的结果。

总的来说,这个模块的目标是在尽可能保持原始语义的情况下,将C语言的main函数翻译为Rust。

use super::*;
use failure::format_err;
use proc_macro2::{TokenStream, TokenTree};

impl<'c> Translation<'c> {
    pub fn convert_main(&self, main_id: CDeclId) -> TranslationResult<Box<Item>> {
        if let CDeclKind::Function {
            ref parameters,
            typ,
            ..
        } = self.ast_context.index(main_id).kind
        {
            let ret: CTypeKind = match self.ast_context.resolve_type(typ).kind {
                CTypeKind::Function(ret, _, _, _, _) => {
                    self.ast_context.resolve_type(ret.ctype).kind.clone()
                }
                ref k => {
                    return Err(format_err!(
                        "Type of main function {:?} was not a function type, got {:?}",
                        main_id,
                        k
                    )
                    .into())
                }
            };

            let main_fn_name = self
                .renamer
                .borrow()
                .get(&main_id)
                .expect("Could not find main function in renamer");

            let decl = mk().fn_decl("main", vec![], None, ReturnType::Default);

            let main_fn = mk().path_expr(vec![main_fn_name]);

            let exit_fn = mk().abs_path_expr(vec!["std", "process", "exit"]);
            let args_fn = mk().abs_path_expr(vec!["std", "env", "args"]);
            let vars_fn = mk().abs_path_expr(vec!["std", "env", "vars"]);

            let no_args: Vec<Box<Expr>> = vec![];

            let mut stmts: Vec<Stmt> = vec![];
            let mut main_args: Vec<Box<Expr>> = vec![];

            let n = parameters.len();

            if n >= 2 {
                // `argv` and `argc`

                stmts.push(mk().local_stmt(Box::new(mk().local(
                    mk().mutbl().ident_pat("args"),
                    Some(mk().path_ty(vec![mk().path_segment_with_args(
                        "Vec",
                        mk().angle_bracketed_args(vec![
                            mk().mutbl().ptr_ty(mk().path_ty(vec!["libc", "c_char"])),
                        ]),
                    )])),
                    Some(mk().call_expr(mk().path_expr(vec!["Vec", "new"]), vec![])),
                ))));
                stmts.push(mk().semi_stmt(mk().for_expr(
                    mk().ident_pat("arg"),
                    mk().call_expr(args_fn, vec![]),
                    mk().block(vec![mk().semi_stmt(mk().method_call_expr(
                        mk().path_expr(vec!["args"]),
                        "push",
                        vec![mk().method_call_expr(
                            mk().method_call_expr(
                                mk().call_expr(
                                    // TODO(kkysen) change `"std"` to `"alloc"` after `#![feature(alloc_c_string)]` is stabilized in `1.63.0`
                                    mk().abs_path_expr(vec!["std", "ffi", "CString", "new"]),
                                    vec![mk().path_expr(vec!["arg"])],
                                ),
                                "expect",
                                vec![mk().lit_expr("Failed to convert argument into CString.")],
                            ),
                            "into_raw",
                            vec![],
                        )],
                    ))]),
                    None::<Ident>,
                )));
                stmts.push(mk().semi_stmt(mk().method_call_expr(
                    mk().path_expr(vec!["args"]),
                    "push",
                    vec![
                        mk().call_expr(mk().abs_path_expr(vec!["core", "ptr", "null_mut"]), vec![]),
                    ],
                )));

                let argc_ty: Box<Type> = match self.ast_context.index(parameters[0]).kind {
                    CDeclKind::Variable { ref typ, .. } => self.convert_type(typ.ctype),
                    _ => Err(TranslationError::generic(
                        "Cannot find type of 'argc' argument in main function",
                    )),
                }?;
                let argv_ty: Box<Type> = match self.ast_context.index(parameters[1]).kind {
                    CDeclKind::Variable { ref typ, .. } => self.convert_type(typ.ctype),
                    _ => Err(TranslationError::generic(
                        "Cannot find type of 'argv' argument in main function",
                    )),
                }?;

                let args = mk().ident_expr("args");
                let argc = mk().binary_expr(
                    BinOp::Sub(Default::default()),
                    mk().method_call_expr(args.clone(), "len", no_args.clone()),
                    mk().lit_expr(mk().int_lit(1, "")),
                );
                let argv = mk().method_call_expr(args, "as_mut_ptr", no_args.clone());

                main_args.push(mk().cast_expr(argc, argc_ty));
                main_args.push(mk().cast_expr(argv, argv_ty));
            }

            if n >= 3 {
                // non-standard `envp`

                stmts.push(mk().local_stmt(Box::new(mk().local(
                    mk().mutbl().ident_pat("vars"),
                    Some(mk().path_ty(vec![mk().path_segment_with_args(
                        "Vec",
                        mk().angle_bracketed_args(vec![
                            mk().mutbl().ptr_ty(mk().path_ty(vec!["libc", "c_char"])),
                        ]),
                    )])),
                    Some(mk().call_expr(mk().path_expr(vec!["Vec", "new"]), vec![])),
                ))));
                let var_name_ident = mk().ident("var_name");
                let var_value_ident = mk().ident("var_value");
                stmts.push(mk().semi_stmt(mk().for_expr(
                    mk().tuple_pat(vec![
                        mk().ident_pat("var_name"),
                        mk().ident_pat("var_value"),
                    ]),
                    mk().call_expr(vars_fn, vec![]),
                    mk().block(vec![
                        mk().local_stmt(Box::new(
                            mk().local(
                                mk().ident_pat("var"),
                                Some(mk().path_ty(vec!["String"])),
                                Some(
                                    mk().mac_expr(
                                        mk().mac(
                                            mk().path(vec!["format"]),
                                            vec![
                                                TokenTree::Literal(
                                                    proc_macro2::Literal::string("{}={}"),
                                                ),
                                                TokenTree::Punct(Punct::new(
                                                    ',',
                                                    proc_macro2::Spacing::Alone,
                                                )),
                                                TokenTree::Ident(var_name_ident),
                                                TokenTree::Punct(Punct::new(
                                                    ',',
                                                    proc_macro2::Spacing::Alone,
                                                )),
                                                TokenTree::Ident(var_value_ident),
                                            ]
                                            .into_iter()
                                            .collect::<TokenStream>(),
                                            MacroDelimiter::Paren(Default::default()),
                                        ),
                                    ),
                                ),
                            ),
                        )),
                        mk().semi_stmt(mk().method_call_expr(
                            mk().path_expr(vec!["vars"]),
                            "push",
                            vec![mk().method_call_expr(
                                mk().method_call_expr(
                                    mk().call_expr(
                                        mk().abs_path_expr(vec![
                                            // TODO(kkysen) change `"std"` to `"alloc"` after `#![feature(alloc_c_string)]` is stabilized in `1.63.0`
                                            "std", "ffi", "CString", "new",
                                        ]),
                                        vec![mk().path_expr(vec!["var"])],
                                    ),
                                    "expect",
                                    vec![mk().lit_expr(
                                    "Failed to convert environment variable into CString."
                                )],
                                ),
                                "into_raw",
                                vec![],
                            )],
                        )),
                    ]),
                    None as Option<Ident>,
                )));
                stmts.push(mk().semi_stmt(mk().method_call_expr(
                    mk().path_expr(vec!["vars"]),
                    "push",
                    vec![
                        mk().call_expr(mk().abs_path_expr(vec!["core", "ptr", "null_mut"]), vec![]),
                    ],
                )));

                let envp_ty: Box<Type> = match self.ast_context.index(parameters[2]).kind {
                    CDeclKind::Variable { ref typ, .. } => self.convert_type(typ.ctype),
                    _ => Err(TranslationError::generic(
                        "Cannot find type of 'envp' argument in main function",
                    )),
                }?;

                let envp = mk().method_call_expr(mk().ident_expr("vars"), "as_mut_ptr", no_args);

                main_args.push(mk().cast_expr(envp, envp_ty));
            }

            // Check `main` has the right form
            if n != 0 && n != 2 && n != 3 {
                return Err(format_err!(
                    "Main function should have 0, 2, or 3 parameters, not {}.",
                    n
                )
                .into());
            };

            if let CTypeKind::Void = ret {
                let call_main = mk().call_expr(main_fn, main_args);
                let unsafe_block = mk().unsafe_block(vec![mk().expr_stmt(call_main)]);

                stmts.push(mk().expr_stmt(mk().unsafe_block_expr(unsafe_block)));

                let exit_arg = mk().lit_expr(mk().int_lit(0, "i32"));
                let call_exit = mk().call_expr(exit_fn, vec![exit_arg]);

                stmts.push(mk().semi_stmt(call_exit));
            } else {
                let call_main = mk().cast_expr(
                    mk().call_expr(main_fn, main_args),
                    mk().path_ty(vec!["i32"]),
                );

                let call_exit = mk().call_expr(exit_fn, vec![call_main]);
                let unsafe_block = mk().unsafe_block(vec![mk().expr_stmt(call_exit)]);

                stmts.push(mk().expr_stmt(mk().unsafe_block_expr(unsafe_block)));
            };

            let block = mk().block(stmts);
            Ok(mk().pub_().fn_item(decl, block))
        } else {
            Err(TranslationError::generic(
                "Cannot translate non-function main entry point",
            ))
        }
    }
}
0
Subscribe to my newsletter

Read articles from Retriever directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Retriever
Retriever