From a2cf7ebb989f23f62913f4af590789f3349a8e21 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 29 Nov 2023 20:36:37 -0800 Subject: [PATCH] Use sml-file, warn on .sig/.fun not containing 1 --- Cargo.lock | 13 +++ crates/cm-syntax/Cargo.toml | 1 + crates/cm-syntax/src/lower.rs | 2 +- crates/cm-syntax/src/types.rs | 15 ++- crates/input/Cargo.toml | 1 + crates/input/src/lower_cm.rs | 10 +- crates/input/src/lower_mlb.rs | 4 +- crates/input/src/topo.rs | 2 +- crates/mlb-hir/Cargo.toml | 1 + crates/mlb-hir/src/lib.rs | 4 +- crates/mlb-statics/Cargo.toml | 1 + crates/mlb-statics/src/lib.rs | 14 +-- crates/mlb-statics/src/std_basis.rs | 4 +- crates/mlb-syntax/Cargo.toml | 1 + crates/mlb-syntax/src/parse.rs | 7 +- crates/mlb-syntax/src/types.rs | 2 +- crates/sml-dynamics-tests/Cargo.toml | 1 + crates/sml-dynamics-tests/src/lib.rs | 2 +- crates/sml-file-syntax/Cargo.toml | 1 + crates/sml-file-syntax/src/lib.rs | 13 ++- crates/sml-file/Cargo.toml | 10 ++ crates/sml-file/src/lib.rs | 39 ++++++++ crates/sml-hir-lower/Cargo.toml | 1 + crates/sml-hir-lower/src/dec.rs | 55 ++++++++++- crates/sml-hir-lower/src/root.rs | 4 +- crates/sml-hir-lower/src/util.rs | 27 +++++- crates/tests/Cargo.toml | 1 + crates/tests/src/check.rs | 2 +- crates/tests/src/docs.rs | 6 +- crates/tests/src/input/cm/syntax.rs | 23 +++-- crates/tests/src/lib.rs | 1 + crates/tests/src/sig_fun_file.rs | 140 +++++++++++++++++++++++++++ docs/ARCHITECTURE.md | 4 + docs/CHANGELOG.md | 5 + docs/diagnostics/4037.md | 8 ++ 35 files changed, 368 insertions(+), 57 deletions(-) create mode 100644 crates/sml-file/Cargo.toml create mode 100644 crates/sml-file/src/lib.rs create mode 100644 crates/tests/src/sig_fun_file.rs create mode 100644 docs/diagnostics/4037.md diff --git a/Cargo.lock b/Cargo.lock index 3067bb2c3..f0f153f29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,6 +136,7 @@ dependencies = [ "lex-util", "paths", "slash-var-path", + "sml-file", "str-util", "text-size-util", ] @@ -427,6 +428,7 @@ dependencies = [ "mlb-syntax", "paths", "slash-var-path", + "sml-file", "sml-file-syntax", "sml-fixity", "sml-namespace", @@ -606,6 +608,7 @@ version = "0.13.5" dependencies = [ "fast-hash", "paths", + "sml-file", "sml-namespace", "str-util", "text-size-util", @@ -621,6 +624,7 @@ dependencies = [ "mlb-hir", "paths", "sml-comment", + "sml-file", "sml-file-syntax", "sml-fixity", "sml-hir", @@ -642,6 +646,7 @@ dependencies = [ "lex-util", "paths", "slash-var-path", + "sml-file", "sml-namespace", "str-util", "text-size-util", @@ -954,6 +959,7 @@ dependencies = [ "config", "pretty_assertions", "sml-dynamics", + "sml-file", "sml-file-syntax", "sml-fixity", "sml-hir", @@ -963,12 +969,17 @@ dependencies = [ "str-util", ] +[[package]] +name = "sml-file" +version = "0.13.5" + [[package]] name = "sml-file-syntax" version = "0.13.5" dependencies = [ "config", "elapsed", + "sml-file", "sml-fixity", "sml-hir-lower", "sml-lex", @@ -1006,6 +1017,7 @@ dependencies = [ "diagnostic", "fast-hash", "lex-util", + "sml-file", "sml-hir", "sml-path", "sml-syntax", @@ -1229,6 +1241,7 @@ dependencies = [ "pulldown-cmark", "serde_json", "slash-var-path", + "sml-file", "sml-syntax", "str-util", "text-pos", diff --git a/crates/cm-syntax/Cargo.toml b/crates/cm-syntax/Cargo.toml index 71de96822..a2b68f2f1 100644 --- a/crates/cm-syntax/Cargo.toml +++ b/crates/cm-syntax/Cargo.toml @@ -16,3 +16,4 @@ text-size-util.workspace = true lex-util.path = "../lex-util" slash-var-path.path = "../slash-var-path" +sml-file.path = "../sml-file" diff --git a/crates/cm-syntax/src/lower.rs b/crates/cm-syntax/src/lower.rs index 71134a98a..3797a9cb3 100644 --- a/crates/cm-syntax/src/lower.rs +++ b/crates/cm-syntax/src/lower.rs @@ -15,7 +15,7 @@ pub(crate) fn get(root: ParseRoot) -> Result { }; let kind = match cls { Some(class) => match class.val { - Class::Sml => PathKind::Sml, + Class::Sml(k) => PathKind::Sml(k), Class::Cm => PathKind::Cm, Class::Other(s) => { return Err(Error::new(ErrorKind::UnsupportedClass(path, s), class.range)) diff --git a/crates/cm-syntax/src/types.rs b/crates/cm-syntax/src/types.rs index 68160db5b..fcc413d55 100644 --- a/crates/cm-syntax/src/types.rs +++ b/crates/cm-syntax/src/types.rs @@ -121,7 +121,7 @@ pub enum CmFileKind { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PathKind { /// SML files. - Sml, + Sml(sml_file::Kind), /// CM files. Cm, } @@ -220,7 +220,7 @@ impl Member { #[derive(Debug, Clone)] pub enum Class { /// SML files. - Sml, + Sml(sml_file::Kind), /// CM files. Cm, /// Some other class. @@ -229,11 +229,8 @@ pub enum Class { impl Class { fn from_path(path: &Path) -> Option { - let ret = match path.extension()?.to_str()? { - "sig" | "sml" | "fun" => Self::Sml, - "cm" => Self::Cm, - _ => return None, - }; + let ext = path.extension()?.to_str()?; + let ret = if ext == "cm" { Self::Cm } else { Self::Sml(ext.parse().ok()?) }; Some(ret) } } @@ -243,7 +240,7 @@ impl FromStr for Class { fn from_str(s: &str) -> Result { let ret = match s.to_ascii_lowercase().as_str() { - "sml" => Self::Sml, + "sml" => Self::Sml(sml_file::Kind::Sml), "cm" | "cmfile" => Self::Cm, s => Self::Other(s.to_owned()), }; @@ -254,7 +251,7 @@ impl FromStr for Class { impl fmt::Display for Class { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Class::Sml => f.write_str("sml"), + Class::Sml(_) => f.write_str("sml"), Class::Cm => f.write_str("cm"), Class::Other(s) => f.write_str(s), } diff --git a/crates/input/Cargo.toml b/crates/input/Cargo.toml index 29ae583fd..3849a72c7 100644 --- a/crates/input/Cargo.toml +++ b/crates/input/Cargo.toml @@ -26,6 +26,7 @@ mlb-hir.path = "../mlb-hir" mlb-syntax.path = "../mlb-syntax" slash-var-path.path = "../slash-var-path" sml-file-syntax.path = "../sml-file-syntax" +sml-file.path = "../sml-file" sml-fixity.path = "../sml-fixity" sml-namespace.path = "../sml-namespace" sml-path.path = "../sml-path" diff --git a/crates/input/src/lower_cm.rs b/crates/input/src/lower_cm.rs index 1a1619a75..b27c1d4f5 100644 --- a/crates/input/src/lower_cm.rs +++ b/crates/input/src/lower_cm.rs @@ -66,7 +66,7 @@ struct CmFile { /// only optional so this can derive default. pos_db: Option, cm_paths: Vec, - sml_paths: FxHashSet, + sml_paths: FxHashSet<(paths::PathId, sml_file::Kind)>, exports: NameExports, } @@ -125,7 +125,7 @@ fn get_one_cm_file( } }; match pp.val.kind() { - cm_syntax::PathKind::Sml => { + cm_syntax::PathKind::Sml(kind) => { let contents = match read_file(st.fs, source, path.as_path()) { Ok(x) => x, Err(e) => { @@ -134,7 +134,7 @@ fn get_one_cm_file( } }; st.sources.insert(path_id, contents); - ret.sml_paths.insert(path_id); + ret.sml_paths.insert((path_id, kind)); } cm_syntax::PathKind::Cm => { let cur = GroupPathToProcess { parent: cur_path_id, range: source.range, path: path_id }; @@ -179,7 +179,7 @@ fn get_one_cm_file( struct ExportCx<'a> { group: &'a StartedGroup, cm_paths: &'a [paths::PathId], - sml_paths: &'a FxHashSet, + sml_paths: &'a FxHashSet<(paths::PathId, sml_file::Kind)>, cur_path_id: paths::PathId, } @@ -273,7 +273,7 @@ fn get_all_sources(st: &mut St<'_, F>, cx: ExportCx<'_>, range: TextRange, ac where F: paths::FileSystem, { - for path_id in cx.sml_paths { + for (path_id, _) in cx.sml_paths { let contents = st.sources.get(path_id).expect("sml file should be set").as_str(); get_top_defs(contents, ac, range); } diff --git a/crates/input/src/lower_mlb.rs b/crates/input/src/lower_mlb.rs index ae799b5d2..482150c68 100644 --- a/crates/input/src/lower_mlb.rs +++ b/crates/input/src/lower_mlb.rs @@ -121,7 +121,7 @@ where } }; let kind = match pp.val.kind() { - mlb_syntax::PathKind::Sml => { + mlb_syntax::PathKind::Sml(file_kind) => { let contents = match read_file(st.fs, source, path.as_path()) { Ok(x) => x, Err(e) => { @@ -131,7 +131,7 @@ where } }; st.sources.insert(path_id, contents); - mlb_hir::PathKind::Source + mlb_hir::PathKind::Source(file_kind) } mlb_syntax::PathKind::Mlb => { st.stack.push(GroupPathToProcess { diff --git a/crates/input/src/topo.rs b/crates/input/src/topo.rs index 1f4cc0904..3084a1b71 100644 --- a/crates/input/src/topo.rs +++ b/crates/input/src/topo.rs @@ -24,7 +24,7 @@ fn bas_dec_paths(ac: &mut BTreeSet, dec: &mlb_hir::BasDec) { mlb_hir::BasDec::Path(p, _) => { ac.insert(*p); } - mlb_hir::BasDec::SourcePathSet(paths) => ac.extend(paths.iter().copied()), + mlb_hir::BasDec::SourcePathSet(paths) => ac.extend(paths.iter().map(|&(x, _)| x)), mlb_hir::BasDec::Basis(_, exp) => bas_exp_paths(ac, exp), mlb_hir::BasDec::Local(local_dec, in_dec) => { bas_dec_paths(ac, local_dec); diff --git a/crates/mlb-hir/Cargo.toml b/crates/mlb-hir/Cargo.toml index 038958ec8..ff1fec93b 100644 --- a/crates/mlb-hir/Cargo.toml +++ b/crates/mlb-hir/Cargo.toml @@ -15,4 +15,5 @@ paths.workspace = true str-util.workspace = true text-size-util.workspace = true +sml-file.path = "../sml-file" sml-namespace.path = "../sml-namespace" diff --git a/crates/mlb-hir/src/lib.rs b/crates/mlb-hir/src/lib.rs index 5f367e457..7307c362b 100644 --- a/crates/mlb-hir/src/lib.rs +++ b/crates/mlb-hir/src/lib.rs @@ -22,7 +22,7 @@ pub enum BasDec { /// A file path. Path(paths::PathId, PathKind), /// Used by CM only. - SourcePathSet(FxHashSet), + SourcePathSet(FxHashSet<(paths::PathId, sml_file::Kind)>), /// A sequence of declarations. Seq(Vec), } @@ -58,7 +58,7 @@ pub enum BasExp { #[derive(Debug, Clone, Copy)] pub enum PathKind { /// An SML source path. - Source, + Source(sml_file::Kind), /// A group path, like MLB or CM. Group, } diff --git a/crates/mlb-statics/Cargo.toml b/crates/mlb-statics/Cargo.toml index de6f2aaf9..11e3f53d6 100644 --- a/crates/mlb-statics/Cargo.toml +++ b/crates/mlb-statics/Cargo.toml @@ -22,6 +22,7 @@ config.path = "../config" mlb-hir.path = "../mlb-hir" sml-comment.path = "../sml-comment" sml-file-syntax.path = "../sml-file-syntax" +sml-file.path = "../sml-file" sml-fixity.path = "../sml-fixity" sml-hir-lower.path = "../sml-hir-lower" sml-hir.path = "../sml-hir" diff --git a/crates/mlb-statics/src/lib.rs b/crates/mlb-statics/src/lib.rs index 1fc2ebe5a..5b85f803e 100644 --- a/crates/mlb-statics/src/lib.rs +++ b/crates/mlb-statics/src/lib.rs @@ -245,10 +245,10 @@ fn get_bas_dec( } }, mlb_hir::BasDec::Path(path, kind) => match kind { - mlb_hir::PathKind::Source => { + mlb_hir::PathKind::Source(file_kind) => { let contents = cx.source_file_contents.get(path).expect("no source file"); let mut fix_env = scope.fix_env.clone(); - let syntax = SourceFileSyntax::new(&mut fix_env, cx.lang, contents); + let syntax = SourceFileSyntax::new(&mut fix_env, cx.lang, *file_kind, contents); get_source_file(st, cx.lang, *path, scope, ac, fix_env, syntax); } mlb_hir::PathKind::Group => match st.bases.get(path) { @@ -259,11 +259,11 @@ fn get_bas_dec( mlb_hir::BasDec::SourcePathSet(paths) => { let mut syntaxes: paths::PathMap<_> = paths .iter() - .map(|path| { - let contents = cx.source_file_contents.get(path).expect("no source file"); + .map(|&(path, kind)| { + let contents = cx.source_file_contents.get(&path).expect("no source file"); let mut fix_env = scope.fix_env.clone(); - let syntax = SourceFileSyntax::new(&mut fix_env, cx.lang, contents); - (*path, (fix_env, syntax)) + let syntax = SourceFileSyntax::new(&mut fix_env, cx.lang, kind, contents); + (path, (fix_env, syntax)) }) .collect(); let hir_roots: paths::PathMap<_> = syntaxes @@ -386,7 +386,7 @@ pub fn update_one( contents: &str, ) { let mut fix_env = sml_fixity::STD_BASIS.clone(); - sf.syntax = sml_file_syntax::SourceFileSyntax::new(&mut fix_env, lang, contents); + sf.syntax = sml_file_syntax::SourceFileSyntax::new(&mut fix_env, lang, sf.syntax.kind, contents); let mode = sml_statics_types::mode::Mode::Regular(Some(path)); let checked = sml_statics::get(syms_tys, &sf.scope, mode, &sf.syntax.lower.arenas, &sf.syntax.lower.root); diff --git a/crates/mlb-statics/src/std_basis.rs b/crates/mlb-statics/src/std_basis.rs index 08d40ef69..ce917558b 100644 --- a/crates/mlb-statics/src/std_basis.rs +++ b/crates/mlb-statics/src/std_basis.rs @@ -97,7 +97,9 @@ where contents = &owned_contents; } let mut fix_env = sml_fixity::STD_BASIS.clone(); - let started = SourceFileSyntax::new(&mut fix_env, &lang, contents); + let ext = std::path::Path::new(name).extension().unwrap().to_str().unwrap(); + let file_kind: sml_file::Kind = ext.parse().unwrap(); + let started = SourceFileSyntax::new(&mut fix_env, &lang, file_kind, contents); if let Some(e) = started.lex_errors.first() { panic!("{name}: lex error: {e}"); } diff --git a/crates/mlb-syntax/Cargo.toml b/crates/mlb-syntax/Cargo.toml index 1378ff804..1dd3852f9 100644 --- a/crates/mlb-syntax/Cargo.toml +++ b/crates/mlb-syntax/Cargo.toml @@ -16,4 +16,5 @@ text-size-util.workspace = true lex-util.path = "../lex-util" slash-var-path.path = "../slash-var-path" +sml-file.path = "../sml-file" sml-namespace.path = "../sml-namespace" diff --git a/crates/mlb-syntax/src/parse.rs b/crates/mlb-syntax/src/parse.rs index 851109000..902ebac46 100644 --- a/crates/mlb-syntax/src/parse.rs +++ b/crates/mlb-syntax/src/parse.rs @@ -171,11 +171,8 @@ fn bas_dec_one(p: &mut Parser<'_>) -> Result { } fn path_kind(path: &Path) -> Option { - let ret = match path.extension()?.to_str()? { - "sml" | "sig" | "fun" => PathKind::Sml, - "mlb" => PathKind::Mlb, - _ => return None, - }; + let ext = path.extension()?.to_str()?; + let ret = if ext == "mlb" { PathKind::Mlb } else { PathKind::Sml(ext.parse().ok()?) }; Some(ret) } diff --git a/crates/mlb-syntax/src/types.rs b/crates/mlb-syntax/src/types.rs index 9d47ec614..72e03ac06 100644 --- a/crates/mlb-syntax/src/types.rs +++ b/crates/mlb-syntax/src/types.rs @@ -131,7 +131,7 @@ pub enum BasExp { #[derive(Debug, Clone, Copy)] pub enum PathKind { /// SML paths. - Sml, + Sml(sml_file::Kind), /// ML Basis paths. Mlb, } diff --git a/crates/sml-dynamics-tests/Cargo.toml b/crates/sml-dynamics-tests/Cargo.toml index 5d46c6dad..75f61f7c3 100644 --- a/crates/sml-dynamics-tests/Cargo.toml +++ b/crates/sml-dynamics-tests/Cargo.toml @@ -15,6 +15,7 @@ str-util.workspace = true config.path = "../config" sml-dynamics.path = "../sml-dynamics" sml-file-syntax.path = "../sml-file-syntax" +sml-file.path = "../sml-file" sml-fixity.path = "../sml-fixity" sml-hir.path = "../sml-hir" sml-path.path = "../sml-path" diff --git a/crates/sml-dynamics-tests/src/lib.rs b/crates/sml-dynamics-tests/src/lib.rs index 64e5a7a09..2b868554d 100644 --- a/crates/sml-dynamics-tests/src/lib.rs +++ b/crates/sml-dynamics-tests/src/lib.rs @@ -19,7 +19,7 @@ fn check(s: &str, steps: &[&str]) { let check_steps = !env_var_enabled("MILLET_NO_CHECK_STEPS"); let mut fix_env = sml_fixity::STD_BASIS.clone(); let lang = config::lang::Language::default(); - let sf = sml_file_syntax::SourceFileSyntax::new(&mut fix_env, &lang, s); + let sf = sml_file_syntax::SourceFileSyntax::new(&mut fix_env, &lang, sml_file::Kind::Sml, s); if let Some(e) = sf.lex_errors.first() { panic!("lex error: {e}"); } diff --git a/crates/sml-file-syntax/Cargo.toml b/crates/sml-file-syntax/Cargo.toml index f38e4ec5f..bfc0597c5 100644 --- a/crates/sml-file-syntax/Cargo.toml +++ b/crates/sml-file-syntax/Cargo.toml @@ -14,6 +14,7 @@ elapsed.workspace = true text-pos.workspace = true config.path = "../config" +sml-file.path = "../sml-file" sml-fixity.path = "../sml-fixity" sml-hir-lower.path = "../sml-hir-lower" sml-lex.path = "../sml-lex" diff --git a/crates/sml-file-syntax/src/lib.rs b/crates/sml-file-syntax/src/lib.rs index 638dbfd8d..89af40c5a 100644 --- a/crates/sml-file-syntax/src/lib.rs +++ b/crates/sml-file-syntax/src/lib.rs @@ -13,16 +13,23 @@ pub struct SourceFileSyntax { pub parse: sml_parse::Parse, /// The lowered HIR. pub lower: sml_hir_lower::Lower, + /// The kind of source file this is. + pub kind: sml_file::Kind, } impl SourceFileSyntax { /// Starts processing a single source file. - pub fn new(fix_env: &mut sml_fixity::Env, lang: &config::lang::Language, contents: &str) -> Self { + pub fn new( + fix_env: &mut sml_fixity::Env, + lang: &config::lang::Language, + kind: sml_file::Kind, + contents: &str, + ) -> Self { elapsed::log("SourceFileSyntax::new", || { let (lex_errors, parse) = Self::lex_and_parse(fix_env, contents); - let mut lower = sml_hir_lower::get(lang, &parse.root); + let mut lower = sml_hir_lower::get(lang, kind, &parse.root); sml_ty_var_scope::get(&mut lower.arenas, &lower.root); - Self { pos_db: text_pos::PositionDb::new(contents), lex_errors, parse, lower } + Self { pos_db: text_pos::PositionDb::new(contents), lex_errors, parse, lower, kind } }) } diff --git a/crates/sml-file/Cargo.toml b/crates/sml-file/Cargo.toml new file mode 100644 index 000000000..9a9e2fdae --- /dev/null +++ b/crates/sml-file/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "sml-file" +version.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[lib] +doctest = false +test = false diff --git a/crates/sml-file/src/lib.rs b/crates/sml-file/src/lib.rs new file mode 100644 index 000000000..7a14c4650 --- /dev/null +++ b/crates/sml-file/src/lib.rs @@ -0,0 +1,39 @@ +//! Handling different kinds of SML files. + +#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] + +/// A kind of SML file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Kind { + /// Regular SML files. + Sml, + /// Files that contain signature definitions (usually just one). + Sig, + /// Files that contain functor definitions (usually just one). + Fun, +} + +impl Kind { + /// Returns whether this is the `Sig` or `Fun` variants. + #[must_use] + pub fn is_sig_or_fun(&self) -> bool { + matches!(*self, Self::Sig | Self::Fun) + } +} +/// The error returned from `from_str` for [`Kind`]. +#[derive(Debug)] +pub struct KindFromStrError(()); + +impl std::str::FromStr for Kind { + type Err = KindFromStrError; + + fn from_str(s: &str) -> Result { + let ret = match s { + "sml" => Kind::Sml, + "sig" => Kind::Sig, + "fun" => Kind::Fun, + _ => return Err(KindFromStrError(())), + }; + Ok(ret) + } +} diff --git a/crates/sml-hir-lower/Cargo.toml b/crates/sml-hir-lower/Cargo.toml index f220a1dac..375bb1156 100644 --- a/crates/sml-hir-lower/Cargo.toml +++ b/crates/sml-hir-lower/Cargo.toml @@ -17,6 +17,7 @@ str-util.workspace = true config.path = "../config" cov-mark.path = "../cov-mark" lex-util.path = "../lex-util" +sml-file.path = "../sml-file" sml-hir.path = "../sml-hir" sml-path.path = "../sml-path" sml-syntax.path = "../sml-syntax" diff --git a/crates/sml-hir-lower/src/dec.rs b/crates/sml-hir-lower/src/dec.rs index ee5d903c0..4b0f241e5 100644 --- a/crates/sml-hir-lower/src/dec.rs +++ b/crates/sml-hir-lower/src/dec.rs @@ -38,12 +38,15 @@ pub(crate) fn get_top_dec(st: &mut St<'_>, root: &ast::Root) -> sml_hir::StrDecS } fn get_top_dec_one(st: &mut St<'_>, top_dec: ast::DecOne) -> sml_hir::StrDecIdx { - match top_dec { + let ret = match top_dec { ast::DecOne::ExpDec(top_dec) => { let ptr = SyntaxNodePtr::new(top_dec.syntax()); if !st.lang().dec.exp { st.err(top_dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("expression"))); } + if st.file_kind().is_sig_or_fun() { + st.err(top_dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let bind = sml_hir::ValBind { rec: false, // the pat should technically be the variable pattern `it`, but allowing that would @@ -68,7 +71,9 @@ fn get_top_dec_one(st: &mut St<'_>, top_dec: ast::DecOne) -> sml_hir::StrDecIdx st.str_dec(sml_hir::StrDec::Dec(vec![dec]), ptr) } _ => get_str_dec_one(st, top_dec), - } + }; + st.mark_has_seen_top_level(); + ret } fn get_str_dec(st: &mut St<'_>, iter: I) -> sml_hir::StrDecSeq @@ -85,6 +90,9 @@ fn get_str_dec_one(st: &mut St<'_>, str_dec: ast::DecOne) -> sml_hir::StrDecIdx if !st.lang().dec.structure { st.err(str_dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`structure`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(str_dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } st.inc_level(); let iter = str_dec.str_binds().filter_map(|str_bind| { let str_exp = str_bind.eq_str_exp().and_then(|x| x.str_exp()); @@ -104,6 +112,11 @@ fn get_str_dec_one(st: &mut St<'_>, str_dec: ast::DecOne) -> sml_hir::StrDecIdx if !st.lang().dec.signature { st.err(str_dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`signature`"))); } + if let (true, (true, sml_file::Kind::Sig) | (_, sml_file::Kind::Fun)) = + (st.is_top_level(), (st.seen_top_level(), st.file_kind())) + { + st.err(str_dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let iter = str_dec.sig_binds().filter_map(|sig_bind| { Some(sml_hir::SigBind { name: get_name(sig_bind.name())?, @@ -116,6 +129,11 @@ fn get_str_dec_one(st: &mut St<'_>, str_dec: ast::DecOne) -> sml_hir::StrDecIdx if !st.lang().dec.functor { st.err(str_dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`functor`"))); } + if let (true, (true, sml_file::Kind::Fun) | (_, sml_file::Kind::Sig)) = + (st.is_top_level(), (st.seen_top_level(), st.file_kind())) + { + st.err(str_dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } st.inc_level(); let iter = str_dec.functor_binds().filter_map(|fun_bind| { let functor_name = get_name(fun_bind.functor_name())?; @@ -151,6 +169,9 @@ fn get_str_dec_one(st: &mut St<'_>, str_dec: ast::DecOne) -> sml_hir::StrDecIdx if !st.lang().dec.local { st.err(str_dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`local`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(str_dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } st.inc_level(); let fst = get_str_dec(st, str_dec.local_dec_hd().into_iter().flat_map(|x| x.decs())); st.dec_level(); @@ -496,6 +517,9 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.val { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`val`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let ty_vars = ty::var_seq(st, dec.ty_var_seq()); let iter = dec.val_binds().map(|val_bind| { let exp = val_bind.eq_exp().and_then(|x| x.exp()); @@ -514,6 +538,9 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.fun { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`fun`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let ty_vars = ty::var_seq(st, dec.ty_var_seq()); let iter = dec.fun_binds().map(|fun_bind| { if let (false, Some(bar)) = (st.lang().successor_ml.opt_bar, fun_bind.bar()) { @@ -626,6 +653,9 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.type_ { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`type`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let hd = dec.ty_head()?; match hd.kind { ast::TyHeadKind::TypeKw => {} @@ -637,6 +667,9 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.datatype { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`datatype`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let d_binds = dat_binds(st, dec.dat_binds()); let t_binds = ty_binds(st, dec.with_type().into_iter().flat_map(|x| x.ty_binds())); sml_hir::Dec::Datatype(d_binds, t_binds) @@ -645,9 +678,15 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.datatype_copy { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`datatype` copy"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } sml_hir::Dec::DatatypeCopy(get_name(dec.name())?, get_path(dec.path()?)?) } ast::DecOne::AbstypeDec(dec) => { + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let d_binds = dat_binds(st, dec.dat_binds()); let t_binds = ty_binds(st, dec.with_type().into_iter().flat_map(|x| x.ty_binds())); let inner = get(st, dec.decs()); @@ -657,6 +696,9 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { if !st.lang().dec.exception { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`exception`"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let iter = dec.ex_binds().filter_map(|ex_bind| { let name = str_util::Name::new(ex_bind.name_star_eq()?.token.text()); let ret = match ex_bind.ex_bind_inner() { @@ -690,7 +732,8 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { ast::DecOne::OpenDec(dec) => { if !st.lang().dec.open { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::Dec("`open`"))); - } else if st.is_top_level() { + } + if st.is_top_level() { st.err(dec.syntax(), ErrorKind::TopLevelOpen); } let paths: Vec<_> = dec.paths().filter_map(get_path).collect(); @@ -704,12 +747,18 @@ fn get_one(st: &mut St<'_>, dec: ast::DecOne) -> Option { let e = ErrorKind::Disallowed(Disallowed::Dec("`infix`, `infixr`, or `nonfix`")); st.err(dec.syntax(), e); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } return None; } ast::DecOne::DoDec(ref inner) => { if !st.lang().successor_ml.do_dec { st.err(dec.syntax(), ErrorKind::Disallowed(Disallowed::SuccMl("`do` declarations"))); } + if st.is_top_level() && st.file_kind().is_sig_or_fun() { + st.err(dec.syntax(), ErrorKind::DiscouragedTopDecKindForSigFun); + } let bind = sml_hir::ValBind { rec: false, pat: st.pat(pat::tuple([]), ptr.clone()), diff --git a/crates/sml-hir-lower/src/root.rs b/crates/sml-hir-lower/src/root.rs index baae1e18c..284c0f7bb 100644 --- a/crates/sml-hir-lower/src/root.rs +++ b/crates/sml-hir-lower/src/root.rs @@ -5,8 +5,8 @@ use sml_syntax::ast; /// Does the conversion. #[must_use] -pub fn get(lang: &config::lang::Language, root: &ast::Root) -> Lower { - let mut st = St::new(lang); +pub fn get(lang: &config::lang::Language, file_kind: sml_file::Kind, root: &ast::Root) -> Lower { + let mut st = St::new(lang, file_kind); let idx = crate::dec::get_top_dec(&mut st, root); st.finish(idx) } diff --git a/crates/sml-hir-lower/src/util.rs b/crates/sml-hir-lower/src/util.rs index 4a3af2a34..25d60f0c1 100644 --- a/crates/sml-hir-lower/src/util.rs +++ b/crates/sml-hir-lower/src/util.rs @@ -80,6 +80,7 @@ pub(crate) enum ErrorKind { EmptyLocal, EmptyRecordPatOrExp, EmptyRecordTy, + DiscouragedTopDecKindForSigFun, } impl fmt::Display for Error { @@ -143,6 +144,9 @@ impl fmt::Display for Error { ErrorKind::EmptyRecordTy => { f.write_str("the empty record/tuple type is usually written as `unit`") } + ErrorKind::DiscouragedTopDecKindForSigFun => f.write_str( + "`.sig` and `.fun` files usually contain exactly one `signature` or `functor` respectively", + ), } } } @@ -268,6 +272,7 @@ impl Error { ErrorKind::EmptyLocal => Code::n(4034), ErrorKind::EmptyRecordPatOrExp => Code::n(4035), ErrorKind::EmptyRecordTy => Code::n(4036), + ErrorKind::DiscouragedTopDecKindForSigFun => Code::n(4037), } } @@ -285,7 +290,8 @@ impl Error { | ErrorKind::EmptyLet | ErrorKind::EmptyLocal | ErrorKind::EmptyRecordPatOrExp - | ErrorKind::EmptyRecordTy => Severity::Warning, + | ErrorKind::EmptyRecordTy + | ErrorKind::DiscouragedTopDecKindForSigFun => Severity::Warning, _ => Severity::Error, } } @@ -312,12 +318,14 @@ pub(crate) struct St<'a> { ptrs: Ptrs, fun_names: Vec, lang: &'a Language, + file_kind: sml_file::Kind, level: usize, + seen_top_level: bool, } #[allow(clippy::unnecessary_wraps)] impl<'a> St<'a> { - pub(crate) fn new(lang: &'a Language) -> St<'a> { + pub(crate) fn new(lang: &'a Language, file_kind: sml_file::Kind) -> St<'a> { St { fresh_idx: 0, errors: Vec::new(), @@ -325,7 +333,9 @@ impl<'a> St<'a> { ptrs: Ptrs::default(), fun_names: Vec::new(), lang, + file_kind, level: 0, + seen_top_level: false, } } @@ -425,6 +435,10 @@ impl<'a> St<'a> { self.lang } + pub(crate) fn file_kind(&self) -> sml_file::Kind { + self.file_kind + } + pub(crate) fn inc_level(&mut self) { self.level += 1; } @@ -436,6 +450,15 @@ impl<'a> St<'a> { pub(crate) fn is_top_level(&self) -> bool { self.level == 0 } + + pub(crate) fn mark_has_seen_top_level(&mut self) { + self.seen_top_level = true; + } + + /// returns whether we have already seen at least one top level dec + pub(crate) fn seen_top_level(&self) -> bool { + self.seen_top_level + } } fn pats_text_range(case: &sml_syntax::ast::FunBindCase) -> Option { diff --git a/crates/tests/Cargo.toml b/crates/tests/Cargo.toml index 0b67ca1b6..ea19403fa 100644 --- a/crates/tests/Cargo.toml +++ b/crates/tests/Cargo.toml @@ -28,4 +28,5 @@ env_logger = "0.10" input.path = "../input" mlb-syntax.path = "../mlb-syntax" slash-var-path.path = "../slash-var-path" +sml-file.path = "../sml-file" sml-syntax.path = "../sml-syntax" diff --git a/crates/tests/src/check.rs b/crates/tests/src/check.rs index 9ae4615af..e4f1019f6 100644 --- a/crates/tests/src/check.rs +++ b/crates/tests/src/check.rs @@ -42,7 +42,7 @@ pub(crate) fn check(s: &str) { check_multi(raw::one_file_fs(s)); } -/// Like [`check`], but allows multiple files. +/// Like [`check`], but allows multiple files, provided as pairs of (filename, contents). #[track_caller] pub(crate) fn check_multi(files: [(&str, &str); N]) { let opts = raw::Opts { diff --git a/crates/tests/src/docs.rs b/crates/tests/src/docs.rs index 44fc55d6b..bd95fda1c 100644 --- a/crates/tests/src/docs.rs +++ b/crates/tests/src/docs.rs @@ -4,7 +4,7 @@ use crate::{check::raw, repo::root_dir}; use diagnostic::Severity; use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag}; -const SML: &str = "sml"; +const LANG_NAME: &str = "sml"; fn opts(limit: raw::Limit) -> raw::Opts<'static> { raw::Opts { @@ -27,12 +27,12 @@ fn check_all(contents: &str) { for ev in parser { match ev { Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => { - if lang.as_ref() == SML { + if lang.as_ref() == LANG_NAME { inside = true; } } Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => { - if lang.as_ref() == SML { + if lang.as_ref() == LANG_NAME { if !ignore_next { raw::get(raw::one_file_fs(ac.as_ref()), opts(limit)); } diff --git a/crates/tests/src/input/cm/syntax.rs b/crates/tests/src/input/cm/syntax.rs index b0739badd..1b73722f9 100644 --- a/crates/tests/src/input/cm/syntax.rs +++ b/crates/tests/src/input/cm/syntax.rs @@ -66,10 +66,13 @@ Group is (* uh.sml *) - support.sml + support.fun "#, vec![], - &[("hi.sml", PathKind::Sml), ("support.sml", PathKind::Sml)], + &[ + ("hi.sml", PathKind::Sml(sml_file::Kind::Sml)), + ("support.fun", PathKind::Sml(sml_file::Kind::Fun)), + ], ); } @@ -95,12 +98,12 @@ is mk_name(Namespace::Signature, "C"), ], &[ - ("a.sml", PathKind::Sml), - ("b/c/d.sml", PathKind::Sml), - ("e.fun", PathKind::Sml), + ("a.sml", PathKind::Sml(sml_file::Kind::Sml)), + ("b/c/d.sml", PathKind::Sml(sml_file::Kind::Sml)), + ("e.fun", PathKind::Sml(sml_file::Kind::Fun)), ("seq.cm", PathKind::Cm), - ("f.sig", PathKind::Sml), - ("uh", PathKind::Sml), + ("f.sig", PathKind::Sml(sml_file::Kind::Sig)), + ("uh", PathKind::Sml(sml_file::Kind::Sml)), ], ); } @@ -123,7 +126,11 @@ is mk_library("quz/baz.cm"), mk_name(Namespace::Signature, "BAR"), ], - &[("Foo.sml", PathKind::Sml), ("Bar/sources.cm", PathKind::Cm), ("quz/baz.cm", PathKind::Cm)], + &[ + ("Foo.sml", PathKind::Sml(sml_file::Kind::Sml)), + ("Bar/sources.cm", PathKind::Cm), + ("quz/baz.cm", PathKind::Cm), + ], ); } diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs index ea63bf034..5c1e394e4 100644 --- a/crates/tests/src/lib.rs +++ b/crates/tests/src/lib.rs @@ -44,6 +44,7 @@ mod rust; mod sep; mod shadow; mod sig; +mod sig_fun_file; mod smoke; mod std_basis; mod symbolic; diff --git a/crates/tests/src/sig_fun_file.rs b/crates/tests/src/sig_fun_file.rs new file mode 100644 index 000000000..a7bff2cbb --- /dev/null +++ b/crates/tests/src/sig_fun_file.rs @@ -0,0 +1,140 @@ +//! Special file kinds which only allow certain kinds of declarations. + +use crate::check::raw; + +const OPTS: raw::Opts<'_> = raw::Opts { + std_basis: raw::StdBasis::Minimal, + outcome: raw::Outcome::Pass, + limit: raw::Limit::First, + min_severity: diagnostic::Severity::Warning, + expected_input: raw::ExpectedInput::Good, +}; + +const CONFIG: &str = r#" +version = 1 +language.successor-ml.do-dec = true +"#; + +#[track_caller] +fn check_sig(s: &str) { + raw::get([(config::file::PATH, CONFIG), ("a.mlb", "a.sig"), ("a.sig", s)], OPTS); +} + +#[track_caller] +fn check_fun(s: &str) { + raw::get([(config::file::PATH, CONFIG), ("a.mlb", "a.fun"), ("a.fun", s)], OPTS); +} + +#[test] +fn sig_zero() { + // TODO this should fail, since it contains 0 (not 1) + check_sig(""); +} + +#[test] +fn fun_zero() { + // TODO this should fail, since it contains 0 (not 1) + check_fun(""); +} + +#[test] +fn sig_one() { + check_sig( + r#" +signature S = sig + type t + val x: t + structure S: sig + type a + val z: a + end +end +"#, + ); +} + +#[test] +fn fun_one() { + check_fun( + r#" +functor F() = struct + val x = 3 + fun f() = () + type a = int + datatype b = D + datatype c = datatype int + exception E + nonfix f + do f() + local val z = 3 in val a = z + 1 end + structure S = struct val a = x end +end +"#, + ); +} + +#[test] +fn sig_two() { + check_sig( + r#" +signature S1 = sig end +signature S2 = sig end +(** + files usually contain exactly one *) +"#, + ); +} + +#[test] +fn fun_two() { + check_fun( + r#" +functor F1() = struct end +functor F2() = struct end +(** + files usually contain exactly one *) +"#, + ); +} + +#[test] +fn sig_in_fun() { + check_fun( + r#" +signature S = sig end +(** + files usually contain exactly one *) +"#, + ); +} + +#[test] +fn fun_in_sig() { + check_sig( + r#" +functor F() = struct end +(** + files usually contain exactly one *) +"#, + ); +} + +#[test] +fn many_dec_kinds() { + let decs = [ + "val x = 3", + "fun f() = 3", + "type t = int", + "datatype t = D", + "datatype t = datatype int", + "exception E", + "infix +", + "do ((fn () => ()) ())", + "structure S = struct end", + "local val x = 3 in signature S = sig end end", + "local val x = 3 in functor F() = struct end end", + ]; + for dec in decs { + let mut contents = dec.to_owned(); + contents.push('\n'); + contents.push_str("(** + files usually contain exactly one *)"); + check_fun(&contents); + check_sig(&contents); + } +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 39f0f43b9..8ad84faae 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -253,6 +253,10 @@ Because the semantics for MLB files determines when source (SML) files get parse These crates depend on many other crates to "unite" everything together. +### `crates/sml-file` + +Kinds of source files (like signature and functor files). + ### `crates/sml-file-syntax` Unites all of the purely syntactical SML passes into one single API. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3be7ae338..e8a247251 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,6 +13,11 @@ The versioning system is basically the following: - If there's a really "big" change. - As mentioned, the "major" version is 0. +## main + +- Warn when a `.sig` file doesn't contain 1 `signature`. +- Warn when a `.fun` file doesn't contain 1 `functor`. + ## v0.13.5 - Tweak inlay hint parentheses around `as` patterns. diff --git a/docs/diagnostics/4037.md b/docs/diagnostics/4037.md new file mode 100644 index 000000000..12b1c4331 --- /dev/null +++ b/docs/diagnostics/4037.md @@ -0,0 +1,8 @@ +# 4037 + +A `.sig` file didn't contain exactly one `signature`, or a `.fun` file didn't contain exactly one `functor`. + +## To fix + +- Make it so the file defines only one of the expected type of top declaration. +- Rename the file to have a generic `.sml` suffix.