diff --git a/Cargo.toml b/Cargo.toml index dc73003..51b580f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,5 +50,7 @@ subprocess = "1.0" thiserror = "2.0" urlencoding = "2.1" utf8-read = "0.4" +scip = "0.6.1" +protobuf = "3" walkdir = "2" heck = "0.5" diff --git a/USAGE.md b/USAGE.md index ab1d23a..ac945e7 100644 --- a/USAGE.md +++ b/USAGE.md @@ -117,3 +117,133 @@ method now_we_can_declare_this_method_with_a_really_really_really_really_long na ``` Will allow 'long_lines' globally, 'nsp_unary' and 'indent_no_tabs' on the `param p = (1 ++ *` line, and 'indent_paren_expr' on the `'4);` line. + +## SCIP Export + +The DLS can export a [SCIP index](https://sourcegraph.com/docs/code-search/code-navigation/scip) +of analyzed DML devices. SCIP (Source Code Intelligence Protocol) is a +language-agnostic format for code intelligence data, used by tools such as +Sourcegraph for cross-repository navigation and code search. + +### Invocation + +SCIP export is available through the DFA (DML File Analyzer) binary via the +`--scip-output ` flag: +``` +dfa --compile-info --workspace --scip-output [list of devices to analyze, ] +``` + +It is worth noting that SCIP format specifies that symbols from documents that are not under the project root (which we define as the workspace) get slotted under external symbols with no occurances tracked. + +### SCIP schema details +Here we list how we have mapped DML specifically to the SCIP format. + +#### SCIP symbol kind mappings + +DML symbol kinds are mapped to SCIP `SymbolInformation.Kind` as follows: + +- `Constant` — Parameter, Constant, Loggroup +- `Variable` — Extern, Saved, Session, Local +- `Parameter` — MethodArg +- `Event` — Hook +- `Method` — Method +- `Class` — Template +- `TypeAlias` — Typedef +- `Object` — All composite objects (Device, Bank, Register, Field, Group, Port, Connect, Attribute, Event, Subdevice, Implement) +- `Interface` — Interface + +Note: SCIP's `Object` kind is used for DML composite objects because they are +instantiated structural components in the device hierarchy, not types or +namespaces. `Event` is used for DML hooks because they represent named event +points that can be sent or listened to. + +Since SCIP's `Kind` enum is too coarse to distinguish between the various DML +composite object kinds (e.g. `register` vs `bank` vs `attribute`), the +`SymbolInformation.documentation` field carries a short-form declaration +signature that disambiguates: + +- **Composite objects:** the DML keyword for the object kind, e.g. `register`, + `bank`, `attribute`, `group`, `field`, `device`, etc. +- **Methods:** the DML declaration modifiers, e.g. `method`, + `independent method default`, `shared method throws`. +- **Other symbol kinds:** no documentation is emitted. + +#### Symbol Naming Scheme + +SCIP symbols follow the format: +` ' ' ' ' ' ' ' ' ` + +For DML, the scheme is `dml`, the manager is `simics`, version is `.` (currently we cannot extract simics version here), and the +package is the device name. Descriptors are built from the fully qualified path +through the device hierarchy: + +``` +dml simics sample_device . sample_device.regs.r1.offset. + ^ term (parameter) +dml simics sample_device . sample_device.regs.r1.read(). + ^ method +dml simics sample_device . bank# + ^ 'type' (template) +``` + +Descriptor suffixes follow the SCIP standard: +- `.` (term) — used for composite objects, parameters, and other named values +- `#` (type) — used only for templates +- `().` (method) — used for methods + +#### Local Symbols + +Method arguments and method-local variables use SCIP local symbols of the form +`local _`, where `` is the internal symbol identifier. Local +symbols are scoped to a single document and are not navigable across files. + +#### Occurrence Roles + +DML definitions (including the primary symbol location) are emitted with the +SCIP `Definition` role. Declarations that also appear as definitions share +this role. Declarations that do _not_ define a value (e.g. abstract method +declarations, or `default` parameter declarations that are overridden) are +emitted with the `ForwardDefinition` role. + +References (including template instantiation sites from `is` statements) are +emitted as plain references with no additional role flags. Access-kind +refinement (`ReadAccess` / `WriteAccess`) is not yet tracked. + +#### Enclosing Ranges + +For composite object definitions and method declarations, each `Definition` +or `ForwardDefinition` occurrence includes an `enclosing_range` that spans +the full AST node (e.g. the complete `register r1 is ... { ... }` block or +the full method body). This allows consumers to associate the definition site +with the extent of the construct it names. + +#### Deduplication and Determinism + +When multiple device analyses share source files (e.g. common library code), +the SCIP export deduplicates occurrences and symbol information so that each +(symbol, range, role) triple and each symbol entry appears at most once. +All output is sorted deterministically: documents by relative path, +occurrences by range, symbols by symbol string, and relationships by symbol. + +#### Relationships + +Composite objects that instantiate templates (via `is some_template`) emit +SCIP `Relationship` entries with `is_implementation = true` pointing to the +template symbol. + +#### File Symbols and Imports + +Each source file involved in the analysis gets a dedicated SCIP symbol of kind +`File`. A `Definition` occurrence is emitted at line 0 of each file so that +navigation to the file symbol opens the file itself. + +For each `import "..."` statement, an `Import` occurrence is emitted at the +import statement's span, referencing the imported file's symbol. This lets +consumers navigate from import statements to the imported file and visualize +file-level dependency graphs. + +File symbols use the format: +``` +dml simics . . path/to/file_dml. +``` +where path segments are separated by term descriptors (`.`). diff --git a/src/actions/requests.rs b/src/actions/requests.rs index 2d8c252..1268689 100644 --- a/src/actions/requests.rs +++ b/src/actions/requests.rs @@ -959,6 +959,153 @@ impl RequestAction for GetKnownContextsRequest { } } +// ---- SCIP Export Request ---- + +#[derive(Debug, Clone)] +pub struct ExportScipRequest; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportScipParams { + /// Device paths to export SCIP for. If empty, exports all known devices. + pub devices: Option>, + /// The file path where the SCIP index should be written. + pub output_path: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportScipResult { + /// Whether the export succeeded. + pub success: bool, + /// Number of documents in the exported index. + pub document_count: usize, + /// Error message, if any. + pub error: Option, +} + +impl LSPRequest for ExportScipRequest { + type Params = ExportScipParams; + type Result = ExportScipResult; + + const METHOD: &'static str = "$/exportScip"; +} + +impl RequestAction for ExportScipRequest { + type Response = ExportScipResult; + + fn timeout() -> std::time::Duration { + crate::server::dispatch::DEFAULT_REQUEST_TIMEOUT * 30 + } + + fn fallback_response() -> Result { + Ok(ExportScipResult { + success: false, + document_count: 0, + error: Some("Request timed out".to_string()), + }) + } + + fn get_identifier(params: &Self::Params) -> String { + Self::request_identifier(¶ms.output_path) + } + + fn handle( + ctx: InitActionContext, + params: Self::Params, + ) -> Result { + info!("Handling SCIP export request to {}", params.output_path); + + // Determine which device paths to export + let device_paths: Vec = + if let Some(devices) = params.devices { + devices.iter().filter_map( + |uri| parse_file_path!(&uri, "ExportScip") + .ok() + .and_then(CanonPath::from_path_buf)) + .collect() + } else { + vec![] + }; + + // Wait for device analyses to be ready + if !device_paths.is_empty() { + ctx.wait_for_state( + AnalysisProgressKind::Device, + AnalysisWaitKind::Work, + AnalysisCoverageSpec::Paths(device_paths.clone())).ok(); + } else { + ctx.wait_for_state( + AnalysisProgressKind::Device, + AnalysisWaitKind::Work, + AnalysisCoverageSpec::All).ok(); + } + + let analysis = ctx.analysis.lock().unwrap(); + + // Collect device analyses + let devices: Vec<&crate::analysis::DeviceAnalysis> = + if device_paths.is_empty() { + // Export all device analyses + analysis.device_analysis.values() + .map(|ts| &ts.stored) + .collect() + } else { + device_paths.iter().filter_map(|path| { + analysis.get_device_analysis(path).ok() + }).collect() + }; + + if devices.is_empty() { + return Ok(ExportScipResult { + success: false, + document_count: 0, + error: Some("No device analyses found".to_string()), + }); + } + + info!("Exporting SCIP for {} device(s)", devices.len()); + + // Extract import resolution data for the SCIP export + let import_data = crate::scip::extract_import_data( + &analysis.isolated_analysis, + &analysis.import_map, + &devices, + ); + + // Determine project root from workspaces + let project_root = ctx.workspace_roots + .lock() + .unwrap() + .first() + .and_then(|ws| parse_file_path!(&ws.uri, "ExportScip").ok()) + .unwrap_or_else(|| std::path::PathBuf::from(".")); + + let index = crate::scip::build_scip_index(&devices, &project_root, + &import_data); + let doc_count = index.documents.len(); + + let output = std::path::Path::new(¶ms.output_path); + match crate::scip::write_scip_to_file(index, output) { + Ok(()) => { + info!("SCIP export complete: {} documents written to {}", + doc_count, params.output_path); + Ok(ExportScipResult { + success: true, + document_count: doc_count, + error: None, + }) + }, + Err(e) => { + error!("SCIP export failed: {}", e); + Ok(ExportScipResult { + success: false, + document_count: 0, + error: Some(e), + }) + } + } + } +} + /// Server-to-client requests impl SentRequest for RegisterCapability { type Response = ::Result; diff --git a/src/cmd.rs b/src/cmd.rs index 0bf0c9d..f297323 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -322,6 +322,24 @@ pub fn set_contexts(paths: Vec) -> Notification, output_path: String) -> Request { + Request { + params: requests::ExportScipParams { + devices: if devices.is_empty() { + None + } else { + Some(devices.into_iter() + .map(|p| parse_uri(&p).unwrap()) + .collect()) + }, + output_path, + }, + action: PhantomData, + id: next_id(), + received: Instant::now(), + } +} + fn next_id() -> RequestId { static ID: AtomicU64 = AtomicU64::new(1); RequestId::Num(ID.fetch_add(1, Ordering::SeqCst)) diff --git a/src/dfa/client.rs b/src/dfa/client.rs index 054eab5..ca6117e 100644 --- a/src/dfa/client.rs +++ b/src/dfa/client.rs @@ -412,4 +412,37 @@ impl ClientInterface { self.server.wait_timeout(Duration::from_millis(1000))?; Ok(()) } + + pub fn export_scip(&mut self, + device_paths: Vec, + output_path: String) + -> anyhow::Result { + debug!("Sending SCIP export request for {:?} -> {}", device_paths, output_path); + self.send( + cmd::export_scip(device_paths, output_path).to_string() + )?; + // Wait for the response + loop { + match self.receive_maybe() { + Ok(ServerMessage::Response(value)) => { + let result: crate::actions::requests::ExportScipResult + = serde_json::from_value(value) + .map_err(|e| RpcErrorKind::from(e.to_string()))?; + return Ok(result); + }, + Ok(ServerMessage::Error(e)) => { + return Err(anyhow::anyhow!( + "Server exited during SCIP export: {:?}", e)); + }, + Ok(_) => { + // Skip other messages (diagnostics, progress, etc.) + continue; + }, + Err(e) => { + trace!("Skipping message during SCIP export wait: {:?}", e); + continue; + } + } + } + } } diff --git a/src/dfa/main.rs b/src/dfa/main.rs index c5e6032..4760f8d 100644 --- a/src/dfa/main.rs +++ b/src/dfa/main.rs @@ -37,6 +37,7 @@ struct Args { lint_cfg_path: Option, test: bool, quiet: bool, + scip_output: Option, } fn parse_args() -> Args { @@ -92,6 +93,11 @@ fn parse_args() -> Args { .action(ArgAction::Set) .value_parser(clap::value_parser!(PathBuf)) .required(false)) + .arg(Arg::new("scip-output").long("scip-output") + .help("Export SCIP index to the specified file after analysis") + .action(ArgAction::Set) + .value_parser(clap::value_parser!(PathBuf)) + .required(false)) .arg(arg!( ... "DML files to analyze") .value_parser(clap::value_parser!(PathBuf))) .arg_required_else_help(false) @@ -115,7 +121,9 @@ fn parse_args() -> Args { linting_enabled: args.get_one::("linting-enabled") .cloned(), lint_cfg_path: args.get_one::("lint-cfg-path") - .cloned() + .cloned(), + scip_output: args.get_one::("scip-output") + .cloned(), } } @@ -177,6 +185,33 @@ fn main_inner() -> Result<(), i32> { if arg.test && !dlsclient.no_errors() { exit_code = Err(1); } + + // Export SCIP if requested + if let Some(scip_path) = &arg.scip_output { + println!("Exporting SCIP index to {:?}", scip_path); + let scip_output_str = scip_path.to_string_lossy().to_string(); + let device_paths: Vec = arg.files.iter() + .filter_map(|f| f.canonicalize().ok()) + .map(|p| p.to_string_lossy().to_string()) + .collect(); + match dlsclient.export_scip(device_paths, scip_output_str) { + Ok(result) => { + if result.success { + println!("SCIP export complete: {} document(s) written", + result.document_count); + } else { + let err_msg = result.error.unwrap_or_else( + || "Unknown error".to_string()); + eprintln!("SCIP export failed: {}", err_msg); + exit_code = Err(1); + } + }, + Err(e) => { + eprintln!("SCIP export request failed: {}", e); + exit_code = Err(1); + } + } + } } // Disregard this result, we dont _really_ care about shutting down diff --git a/src/lib.rs b/src/lib.rs index 497835b..e6d7c8d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ pub mod dfa; pub mod file_management; pub mod lint; pub mod lsp_data; +pub mod scip; pub mod server; pub mod span; pub mod utility; diff --git a/src/scip/mod.rs b/src/scip/mod.rs new file mode 100644 index 0000000..bbbedd3 --- /dev/null +++ b/src/scip/mod.rs @@ -0,0 +1,722 @@ +// © 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 and MIT +//! SCIP (Source Code Intelligence Protocol) export support. +//! +//! This module converts DLS analysis data (DeviceAnalysis) into +//! the SCIP index format for use with code intelligence tools. + +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; + +use protobuf::MessageField; +use protobuf::Enum; + +use scip::types::{ + Document, Index, Metadata, Occurrence, PositionEncoding, + Relationship, SymbolInformation, SymbolRole, ToolInfo, + symbol_information::Kind as ScipSymbolKind, +}; + +use crate::analysis::symbols::{DMLSymbolKind, SymbolSource}; +use crate::analysis::structure::objects::{CompObjectKind, MethodModifier}; +use crate::analysis::templating::objects::{ + DMLHierarchyMember, DMLNamedMember, DMLObject, StructureContainer, +}; +use crate::analysis::DeviceAnalysis; +use crate::analysis::IsolatedAnalysis; +use crate::Span as ZeroSpan; +use crate::file_management::CanonPath; + +use log::debug; + +/// Per-file import resolution data for SCIP export. +/// +/// Maps each source file (canonical path) to its list of +/// (import_statement_span, resolved_target_canonical_path) pairs. +pub type FileImportData = HashMap>; + +/// Convert a ZeroSpan range into the SCIP occurrence range format. +/// +/// SCIP uses `[startLine, startChar, endLine, endChar]` (4 elements) +/// or `[startLine, startChar, endChar]` (3 elements, same-line). +/// All values are 0-based. +fn span_to_scip_range(span: &ZeroSpan) -> Vec { + let r = &span.range; + let start_line = r.row_start.0 as i32; + let start_char = r.col_start.0 as i32; + let end_line = r.row_end.0 as i32; + let end_char = r.col_end.0 as i32; + + if start_line == end_line { + vec![start_line, start_char, end_char] + } else { + vec![start_line, start_char, end_line, end_char] + } +} + +/// Map a DMLSymbolKind to a SCIP SymbolInformation Kind. +fn dml_kind_to_scip_kind(kind: &DMLSymbolKind) -> ScipSymbolKind { + match kind { + DMLSymbolKind::CompObject(comp_kind) => match comp_kind { + CompObjectKind::Interface => ScipSymbolKind::Interface, + CompObjectKind::Implement => ScipSymbolKind::Object, + _ => ScipSymbolKind::Object, + }, + DMLSymbolKind::Parameter => ScipSymbolKind::Constant, + DMLSymbolKind::Constant => ScipSymbolKind::Constant, + DMLSymbolKind::Extern => ScipSymbolKind::Variable, + DMLSymbolKind::Hook => ScipSymbolKind::Event, + DMLSymbolKind::Local => ScipSymbolKind::Variable, + DMLSymbolKind::Loggroup => ScipSymbolKind::Constant, + DMLSymbolKind::Method => ScipSymbolKind::Method, + DMLSymbolKind::MethodArg => ScipSymbolKind::Parameter, + DMLSymbolKind::Saved => ScipSymbolKind::Variable, + DMLSymbolKind::Session => ScipSymbolKind::Variable, + DMLSymbolKind::Template => ScipSymbolKind::Class, + DMLSymbolKind::Typedef => ScipSymbolKind::TypeAlias, + } +} + +/// Sanitize a name for use in SCIP symbol strings. +/// +/// SCIP descriptors use backtick-escaping for names that contain +/// non-identifier characters, but to keep things simple we sanitize +/// to `[a-zA-Z0-9_]+`. +fn sanitize_name(name: &str) -> String { + name.chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' }) + .collect() +} + +/// Build a SCIP symbol string representing a DML source file. +/// +/// File symbols use the path relative to the project root (or the +/// full path for external files) as the descriptor, with dots and +/// slashes sanitized. +fn make_file_symbol(path: &Path, project_root: &Path) -> String { + let display = path.strip_prefix(project_root) + .unwrap_or(path) + .to_string_lossy(); + let sanitized = display.chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } + else if c == '/' || c == '\\' { '/' } + else { '_' }) + .collect::(); + // Use the path segments as nested term descriptors + let descriptors: String = sanitized.split('/') + .filter(|s| !s.is_empty()) + .map(|s| format!("{}.", s)) + .collect(); + format!("dml simics . . {}", descriptors) +} + +/// Extract import resolution data from an AnalysisStorage for a set +/// of device analyses. +/// +/// For each file involved in any of the given devices, collects the +/// (import_span, resolved_path) pairs from the IsolatedAnalysis +/// import data and the import_map resolution data. +pub fn extract_import_data( + isolated_analyses: &HashMap>, + import_map: &HashMap, + HashMap>>, + devices: &[&DeviceAnalysis], +) -> FileImportData { + let mut result = FileImportData::new(); + for device in devices { + let device_context = Some(device.path.clone()); + for file_path in &device.dependant_files { + if result.contains_key(file_path) { + continue; + } + let mut imports = Vec::new(); + if let Some(analysis) = isolated_analyses.get(file_path) { + let context_map = import_map.get(file_path); + let resolved = context_map + .and_then(|cm| cm.get(&device_context)) + .or_else(|| context_map.and_then(|cm| cm.get(&None))); + for import_decl in analysis.stored.get_imports() { + let import = &import_decl.obj; + if let Some(resolved_map) = resolved { + if let Some(resolved_str) = resolved_map.get(import) { + if let Some(canon) = + CanonPath::from_path_buf( + PathBuf::from(resolved_str)) { + imports.push((import.span, canon)); + } + } + } + } + } + result.insert(file_path.clone(), imports); + } + } + result +} + +/// Build a `local` SCIP symbol string (document-scoped). +/// +/// Used for method arguments, method locals, and other symbols that +/// are only visible within a single file scope. +fn make_local_symbol(name: &str, id: u64) -> String { + format!("local {}_{}", sanitize_name(name), id) +} + +/// Build a global SCIP symbol string from a qualified path. +/// +/// Global symbols use the format: +/// `scheme ' ' manager ' ' package ' ' version ' ' descriptors...` +/// +/// We use: +/// - scheme: `dml` +/// - manager: `simics` +/// - package: device name +/// - version: `.` (single dot = no version) +/// - descriptors: built from the qualified path segments +/// +/// SCIP descriptor suffixes: +/// - `.` = namespace/term (banks, groups, etc.) +/// - `#` = type (templates, comp objects) +/// - `()` = method +fn make_global_symbol(device_name: &str, qualified_path: &str, + kind: &DMLSymbolKind) -> String { + let segments: Vec<&str> = qualified_path.split('.').collect(); + let mut descriptors = String::new(); + for (i, seg) in segments.iter().enumerate() { + let sanitized = sanitize_name(seg); + if i == segments.len() - 1 { + // Last segment gets suffix based on kind + match kind { + DMLSymbolKind::Method => { + descriptors.push_str(&sanitized); + descriptors.push_str("()."); + } + DMLSymbolKind::Template => { + // Templates are the type-like concept in DML + descriptors.push_str(&sanitized); + descriptors.push('#'); + } + _ => { + // Composite objects (device, bank, register, ...) + // are instances, not types — use term descriptor + descriptors.push_str(&sanitized); + descriptors.push('.'); + } + } + } else { + // Intermediate segments are enclosing object instances + // (device, bank, register, ...) — use term descriptor + descriptors.push_str(&sanitized); + descriptors.push('.'); + } + } + format!("dml simics {} . {}", sanitize_name(device_name), descriptors) +} + +/// Build the SCIP symbol string for a given SymbolSource. +/// +/// - DMLObject (comp or shallow): uses global symbol with qualified_name() +/// - Method: uses global symbol with parent's qualified_name + method name +/// - Template: uses global symbol at top level +/// - MethodArg / MethodLocal: uses local symbol +/// - Type: returns None (these are skipped) +fn scip_symbol_for_source( + source: &SymbolSource, + kind: &DMLSymbolKind, + id: u64, + device_name: &str, + container: &StructureContainer, +) -> Option<(String, String)> { + // Returns Some((scip_symbol, display_name)) + match source { + SymbolSource::DMLObject(dml_obj) => { + match dml_obj { + DMLObject::CompObject(key) => { + if let Some(comp) = container.get(*key) { + let qname = comp.qualified_name(container); + let display = comp.identity().to_string(); + let sym = make_global_symbol(device_name, + &qname, kind); + Some((sym, display)) + } else { + None + } + } + DMLObject::ShallowObject(shallow) => { + let qname = shallow.qualified_name(container); + let display = shallow.identity().to_string(); + let sym = make_global_symbol(device_name, + &qname, kind); + Some((sym, display)) + } + } + } + SymbolSource::Method(parent_key, methref) => { + let parent_qname = container.get(*parent_key) + .map(|p| p.qualified_name(container)) + .unwrap_or_default(); + let method_name = methref.identity(); + let qname = if parent_qname.is_empty() { + method_name.to_string() + } else { + format!("{}.{}", parent_qname, method_name) + }; + let sym = make_global_symbol( + device_name, &qname, &DMLSymbolKind::Method); + Some((sym, method_name.to_string())) + } + SymbolSource::Template(templ) => { + let sym = make_global_symbol( + device_name, &templ.name, &DMLSymbolKind::Template); + Some((sym, templ.name.clone())) + } + SymbolSource::MethodArg(_, name) => { + let sym = make_local_symbol(&name.val, id); + Some((sym, name.val.clone())) + } + SymbolSource::MethodLocal(_, name) => { + let sym = make_local_symbol(&name.val, id); + Some((sym, name.val.clone())) + } + SymbolSource::Type(_) => None, + } +} + +/// Build a short-form declaration signature for a DML symbol. +/// +/// For composite objects this is just the object kind keyword +/// (e.g. `"register"`, `"bank"`). +/// For methods this is the modifier keywords from the declaration +/// (e.g. `"independent method default"`, `"shared method throws"`). +/// Other symbol kinds currently produce no documentation. +fn make_documentation( + source: &SymbolSource, + container: &StructureContainer, +) -> Vec { + match source { + SymbolSource::DMLObject(DMLObject::CompObject(key)) => { + if let Some(comp) = container.get(*key) { + vec![comp.kind.kind_name().to_string()] + } else { + vec![] + } + } + SymbolSource::Method(_, methref) => { + let decl = methref.get_decl(); + let mut parts = Vec::new(); + if decl.independent { + parts.push("independent"); + } + match decl.modifier { + MethodModifier::Shared => parts.push("shared"), + MethodModifier::Inline => parts.push("inline"), + MethodModifier::None => {} + } + parts.push("method"); + if decl.default { + parts.push("default"); + } + if decl.throws { + parts.push("throws"); + } + vec![parts.join(" ")] + } + _ => vec![], + } +} + +/// Build a map from definition/declaration name locations to their +/// enclosing AST spans, for use as SCIP `enclosing_range`. +/// +/// For composite objects, each ObjectSpec has a `loc` (name span) and +/// a `span` (full `group foo is bar { ... }` range). For methods, +/// the MethodDecl has a name location and a full declaration span. +fn enclosing_ranges_for_source( + source: &SymbolSource, + container: &StructureContainer, +) -> HashMap { + let mut map = HashMap::new(); + match source { + SymbolSource::DMLObject(DMLObject::CompObject(key)) => { + if let Some(comp) = container.get(*key) { + for spec in &comp.all_decls { + map.insert(spec.loc, spec.span); + } + // definitions may include specs not in all_decls + for spec in &comp.definitions { + map.entry(spec.loc).or_insert(spec.span); + } + } + } + SymbolSource::Method(_, methref) => { + let decl = methref.get_decl(); + map.insert(decl.name.span, decl.span); + } + _ => {} + } + map +} + +/// Holds per-file occurrence and symbol information data +/// that will be assembled into SCIP Documents. +/// +/// Uses HashMaps keyed by dedup keys so that duplicate entries +/// from multiple device analyses are naturally collapsed. +#[derive(Default)] +struct FileData { + /// Occurrences keyed by (symbol, range, roles) to avoid duplicates. + occurrences: HashMap<(String, Vec, i32), Occurrence>, + /// SymbolInformation keyed by SCIP symbol string. + symbols: HashMap, +} + +impl FileData { + /// Insert an occurrence, deduplicating by (symbol, range, roles). + fn add_occurrence(&mut self, occ: Occurrence) { + let key = ( + occ.symbol.clone(), + occ.range.clone(), + occ.symbol_roles, + ); + self.occurrences.entry(key).or_insert(occ); + } + + /// Insert a SymbolInformation entry, deduplicating by symbol string. + fn add_symbol_info(&mut self, sym_info: SymbolInformation) { + self.symbols.entry(sym_info.symbol.clone()).or_insert(sym_info); + } + + fn into_vecs(self) -> (Vec, Vec) { + let mut occs: Vec<_> = self.occurrences.into_values().collect(); + occs.sort_by(|a, b| a.range.cmp(&b.range)); + let mut syms: Vec<_> = self.symbols.into_values().collect(); + syms.sort_by(|a, b| a.symbol.cmp(&b.symbol)); + (occs, syms) + } +} + +/// Convert a single DeviceAnalysis into SCIP Documents. +/// +/// Returns a tuple of (documents, external_symbols). Files under the +/// project root become Documents with relative paths; files outside +/// (e.g. Simics builtins) contribute only their SymbolInformation to +/// `external_symbols` for hover/navigation support. +fn device_analysis_to_documents( + device: &DeviceAnalysis, + project_root: &Path, + import_data: &FileImportData, +) -> (Vec, Vec) { + let mut file_data: HashMap = HashMap::new(); + let container = &device.objects; + let device_name = &device.name; + + // Iterate over all symbols in the device analysis + for symbol_ref in device.symbol_info.all_symbols() { + let sym = symbol_ref.symbol.lock().unwrap(); + + // Build the SCIP symbol and display name from the source + let (scip_symbol, display_name) = match scip_symbol_for_source( + &sym.source, &sym.kind, sym.id, device_name, container, + ) { + Some(pair) => pair, + None => continue, // Type symbols and unresolvable objects + }; + + debug!("SCIP symbol id={} kind={:?} scip={} defs={} decls={} refs={} impls={}", + sym.id, sym.kind, &scip_symbol, + sym.definitions.len(), sym.declarations.len(), + sym.references.len(), sym.implementations.len()); + + let kind = dml_kind_to_scip_kind(&sym.kind); + let documentation = make_documentation(&sym.source, container); + let enclosing = enclosing_ranges_for_source(&sym.source, container); + + // Record the primary location as a definition occurrence + { + let loc = &sym.loc; + let file_path = loc.path(); + let data = file_data.entry(file_path).or_default(); + + let mut occ = Occurrence::new(); + occ.range = span_to_scip_range(loc); + occ.symbol = scip_symbol.clone(); + occ.symbol_roles = SymbolRole::Definition.value(); + if let Some(enc) = enclosing.get(loc) { + occ.enclosing_range = span_to_scip_range(enc); + } + + data.add_occurrence(occ); + + // Add SymbolInformation for this symbol (only once, at def site) + let mut sym_info = SymbolInformation::new(); + sym_info.symbol = scip_symbol.clone(); + sym_info.kind = kind.into(); + sym_info.display_name = display_name; + sym_info.documentation = documentation; + + // For comp objects, add Relationship entries for each + // instantiated template (`is` declarations). + if let SymbolSource::DMLObject( + DMLObject::CompObject(key)) = &sym.source { + if let Some(comp) = container.get(*key) { + for templ_name in comp.templates.keys() { + let templ_symbol = make_global_symbol( + device_name, templ_name, + &DMLSymbolKind::Template); + let mut rel = Relationship::new(); + rel.symbol = templ_symbol; + rel.is_implementation = true; + sym_info.relationships.push(rel); + } + } + } + + sym_info.relationships.sort_by(|a, b| a.symbol.cmp(&b.symbol)); + data.add_symbol_info(sym_info); + } + + // Record additional definitions + for def_span in &sym.definitions { + // Skip if same as primary loc + if *def_span == sym.loc { + continue; + } + let file_path = def_span.path(); + let data = file_data.entry(file_path).or_default(); + + let mut occ = Occurrence::new(); + occ.range = span_to_scip_range(def_span); + occ.symbol = scip_symbol.clone(); + occ.symbol_roles = SymbolRole::Definition.value(); + if let Some(enc) = enclosing.get(def_span) { + occ.enclosing_range = span_to_scip_range(enc); + } + data.add_occurrence(occ); + } + + // Record declarations + for decl_span in &sym.declarations { + if *decl_span == sym.loc { + continue; + } + let file_path = decl_span.path(); + let data = file_data.entry(file_path).or_default(); + + let mut occ = Occurrence::new(); + occ.range = span_to_scip_range(decl_span); + occ.symbol = scip_symbol.clone(); + // If this declaration site also appears in definitions, + // it defines a value and gets the Definition role. + // Otherwise it's an abstract/forward declaration. + if sym.definitions.contains(decl_span) { + occ.symbol_roles = SymbolRole::Definition.value(); + } else { + occ.symbol_roles = SymbolRole::ForwardDefinition.value(); + } + if let Some(enc) = enclosing.get(decl_span) { + occ.enclosing_range = span_to_scip_range(enc); + } + data.add_occurrence(occ); + } + + // Record references (read accesses) + for ref_span in &sym.references { + let file_path = ref_span.path(); + let data = file_data.entry(file_path).or_default(); + + let mut occ = Occurrence::new(); + occ.range = span_to_scip_range(ref_span); + occ.symbol = scip_symbol.clone(); + // Plain reference (no Definition/ReadAccess/WriteAccess role). + // TODO: narrow down to ReadAccess/WriteAccess once the + // analysis tracks access kinds. + occ.symbol_roles = 0; + data.add_occurrence(occ); + } + + // Record implementation sites (`is template` occurrences) + // These are references to the template, not definitions. + // The actual implementation relationship is expressed via + // Relationship entries on the comp object's SymbolInformation. + for impl_span in &sym.implementations { + let file_path = impl_span.path(); + let data = file_data.entry(file_path).or_default(); + + let mut occ = Occurrence::new(); + occ.range = span_to_scip_range(impl_span); + occ.symbol = scip_symbol.clone(); + // Plain reference — the implementation relationship is + // expressed via Relationship entries, not occurrence roles. + occ.symbol_roles = 0; + data.add_occurrence(occ); + } + } + + // Emit file-level symbols and import occurrences. + // + // For each file in the device analysis, we create a file-level + // symbol (with a Definition occurrence at line 0) and then emit + // Import occurrences at each `import "..."` statement pointing + // to the imported file's symbol. + for dep_path in &device.dependant_files { + let file_pathbuf: PathBuf = dep_path.clone().into(); + let file_sym = make_file_symbol(&file_pathbuf, project_root); + + // Definition occurrence at line 0 of the file + let data = file_data.entry(file_pathbuf.clone()).or_default(); + let mut def_occ = Occurrence::new(); + def_occ.range = vec![0, 0, 0]; // line 0, char 0, end char 0 + def_occ.symbol = file_sym.clone(); + def_occ.symbol_roles = SymbolRole::Definition.value(); + data.add_occurrence(def_occ); + + // SymbolInformation for the file + let mut sym_info = SymbolInformation::new(); + sym_info.symbol = file_sym.clone(); + sym_info.kind = ScipSymbolKind::File.into(); + sym_info.display_name = file_pathbuf.file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + data.add_symbol_info(sym_info); + + // Import occurrences for each `import "..."` in this file + if let Some(imports) = import_data.get(dep_path) { + for (import_span, resolved_path) in imports { + let target_pathbuf: PathBuf = resolved_path.clone().into(); + let target_sym = make_file_symbol(&target_pathbuf, project_root); + + let mut imp_occ = Occurrence::new(); + imp_occ.range = span_to_scip_range(import_span); + imp_occ.symbol = target_sym; + imp_occ.symbol_roles = SymbolRole::Import.value(); + data.add_occurrence(imp_occ); + } + } + } + + // Assemble Documents, separating in-project from external files. + let mut documents = Vec::new(); + let mut external_symbols = Vec::new(); + + for (path, data) in file_data { + let (occs, syms) = data.into_vecs(); + match path.strip_prefix(project_root) { + Ok(rel) => { + let mut doc = Document::new(); + doc.relative_path = rel.to_string_lossy().to_string(); + doc.language = "dml".to_string(); + doc.position_encoding = + PositionEncoding::UTF16CodeUnitOffsetFromLineStart.into(); + doc.occurrences = occs; + doc.symbols = syms; + documents.push(doc); + } + Err(_) => { + // External file: keep symbol info for hover/navigation + // but don't emit a document or occurrences + external_symbols.extend(syms); + } + } + } + + // Remove from external_symbols any symbol that already appears + // in a document. This can happen when multiple internal Symbol + // objects (e.g. from different templates) produce the same SCIP + // symbol string but have their primary locations in different + // files — one in-project and one external. + let doc_symbol_strings: HashSet<&str> = documents.iter() + .flat_map(|doc| doc.symbols.iter().map(|s| s.symbol.as_str())) + .collect(); + external_symbols.retain(|s| !doc_symbol_strings.contains(s.symbol.as_str())); + + (documents, external_symbols) +} + +/// Build a complete SCIP Index from one or more DeviceAnalyses. +/// +/// # Arguments +/// * `devices` - The device analyses to export +/// * `project_root` - The workspace root path, used to compute relative paths +pub fn build_scip_index( + devices: &[&DeviceAnalysis], + project_root: &Path, + import_data: &FileImportData, +) -> Index { + debug!("Building SCIP index for {} device(s) rooted at {:?}", + devices.len(), project_root); + + let mut tool_info = ToolInfo::new(); + tool_info.name = "dls".to_string(); + tool_info.version = crate::version(); + + let mut metadata = Metadata::new(); + metadata.tool_info = MessageField::some(tool_info); + let root_str = project_root.to_string_lossy(); + metadata.project_root = if root_str.ends_with('/') { + format!("file://{}", root_str) + } else { + format!("file://{}/", root_str) + }; + metadata.text_document_encoding = scip::types::TextEncoding::UTF8.into(); + + // Collect documents from all devices, merging by relative_path. + // We use FileData for deduplication across devices: the same symbol + // or occurrence can appear in multiple DeviceAnalyses when they + // share source files (e.g. common library code). + let mut merged: HashMap = HashMap::new(); + let mut ext_dedup = FileData::default(); + + for device in devices { + let (docs, ext_syms) = device_analysis_to_documents(device, project_root, import_data); + for doc in docs { + let (_, dedup) = merged + .entry(doc.relative_path.clone()) + .or_insert_with(|| { + let mut d = Document::new(); + d.relative_path = doc.relative_path.clone(); + d.language = doc.language.clone(); + d.position_encoding = doc.position_encoding; + (d, FileData::default()) + }); + for occ in doc.occurrences { + dedup.add_occurrence(occ); + } + for sym in doc.symbols { + dedup.add_symbol_info(sym); + } + } + for sym in ext_syms { + ext_dedup.add_symbol_info(sym); + } + } + + // Move deduplicated data into the final documents, sorted for + // deterministic output. + let mut documents: Vec = merged.into_values().map(|(mut doc, dedup)| { + let (occs, syms) = dedup.into_vecs(); + doc.occurrences = occs; + doc.symbols = syms; + doc + }).collect(); + documents.sort_by(|a, b| a.relative_path.cmp(&b.relative_path)); + + let mut index = Index::new(); + index.metadata = MessageField::some(metadata); + index.documents = documents; + let (_, mut ext_syms) = ext_dedup.into_vecs(); + ext_syms.sort_by(|a, b| a.symbol.cmp(&b.symbol)); + index.external_symbols = ext_syms; + + debug!("SCIP index built with {} document(s)", index.documents.len()); + index +} + +/// Write a SCIP index to a file. +pub fn write_scip_to_file(index: Index, output_path: &Path) + -> Result<(), String> { + debug!("Writing SCIP index to {:?}", output_path); + scip::write_message_to_file(output_path, index) + .map_err(|e| format!("Failed to write SCIP index: {}", e)) +} diff --git a/src/server/dispatch.rs b/src/server/dispatch.rs index 514da63..afc2d1a 100644 --- a/src/server/dispatch.rs +++ b/src/server/dispatch.rs @@ -113,6 +113,7 @@ define_dispatch_request_enum!( ExecuteCommand, CodeLensRequest, GetKnownContextsRequest, + ExportScipRequest, ); /// Provides ability to dispatch requests to a worker thread that will diff --git a/src/server/mod.rs b/src/server/mod.rs index 056e8dd..b27d2b5 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -594,7 +594,8 @@ impl LsService { requests::References, requests::Completion, requests::CodeLensRequest, - requests::GetKnownContextsRequest; + requests::GetKnownContextsRequest, + requests::ExportScipRequest; ); Ok(()) }