|
27 | 27 | use nom::branch::alt; |
28 | 28 | use nom::bytes::complete::{tag, take_until}; |
29 | 29 | use nom::character::complete::{ |
30 | | - alphanumeric1, line_ending, multispace1, not_line_ending, space0, space1, |
| 30 | + line_ending, multispace1, not_line_ending, space0, space1, |
31 | 31 | }; |
| 32 | +use nom_unicode::complete::alphanumeric1; |
32 | 33 | use nom::combinator::{all_consuming, opt, recognize, value, verify}; |
33 | 34 | use nom::multi::{many0, many1, separated_list1}; |
34 | 35 | use nom::sequence::{delimited, preceded, terminated}; |
@@ -61,12 +62,28 @@ impl Import { |
61 | 62 | } |
62 | 63 | } |
63 | 64 |
|
64 | | -pub fn parse_imports(s: &str) -> Result<Vec<Import>, String> { |
65 | | - let s = Span::new(s); |
66 | | - let (_, result) = all_consuming(parse_block(false)) |
67 | | - .parse(s) |
| 65 | +pub fn parse_imports(python_file_contents: &str) -> Result<Vec<Import>, String> { |
| 66 | + let span = Span::new(python_file_contents); |
| 67 | + let (_, imports) = all_consuming(parse_block(false)) |
| 68 | + .parse(span) |
68 | 69 | .map_err(|e| e.to_string())?; |
69 | | - Ok(result) |
| 70 | + Ok(with_corrected_line_contents(python_file_contents, imports)) |
| 71 | +} |
| 72 | + |
| 73 | +// Return the imports, but with the full line contents. |
| 74 | +// Currently our nom parsing only pulls out to the latest token, so we correct it here |
| 75 | +// by finding the corresponding whole line, based on the line number. |
| 76 | +// TODO: adjust the parsing code so it figures it out correctly in the first place. |
| 77 | +fn with_corrected_line_contents(python_file_contents: &str, imports: Vec<Import>) -> Vec<Import> { |
| 78 | + let lines: Vec<&str> = python_file_contents.lines().collect(); |
| 79 | + imports.into_iter().map( |
| 80 | + |import| Import { |
| 81 | + imported_object: import.imported_object, |
| 82 | + line_number: import.line_number, |
| 83 | + line_contents: lines[(import.line_number as usize) - 1].trim_start().to_string(), |
| 84 | + typechecking_only: import.typechecking_only, |
| 85 | + } |
| 86 | + ).collect() |
70 | 87 | } |
71 | 88 |
|
72 | 89 | fn parse_block(typechecking_only: bool) -> impl Fn(Span) -> IResult<Span, Vec<Import>> { |
@@ -276,8 +293,18 @@ fn parse_relative_module(s: Span) -> IResult<Span, &str> { |
276 | 293 | Ok((s, result.fragment())) |
277 | 294 | } |
278 | 295 |
|
| 296 | + |
| 297 | +// Parse a valid Python identifier. |
| 298 | +// |
| 299 | +// Note this is not implemented as thoroughly as in the Python spec. |
| 300 | +// Some identifiers will be valid here (e.g. ones that begin with digits) |
| 301 | +// that aren't actually valid in Python. Unicode identifiers are supported. |
| 302 | +// |
| 303 | +// See https://docs.python.org/3/reference/lexical_analysis.html#identifiers |
279 | 304 | fn parse_identifier(s: Span) -> IResult<Span, &str> { |
280 | | - let (s, result) = recognize(many1(alt((alphanumeric1, tag("_"))))).parse(s)?; |
| 305 | + let (s, result) = recognize( |
| 306 | + many1(alt((alphanumeric1, tag("_")))) |
| 307 | + ).parse(s)?; |
281 | 308 | Ok((s, result.fragment())) |
282 | 309 | } |
283 | 310 |
|
@@ -310,11 +337,17 @@ fn parse_space1(s: Span) -> IResult<Span, ()> { |
310 | 337 | Ok((s, ())) |
311 | 338 | } |
312 | 339 |
|
| 340 | + |
313 | 341 | fn parse_if_typechecking(s: Span) -> IResult<Span, Vec<Import>> { |
314 | 342 | let (s, _) = ( |
315 | 343 | tag("if"), |
316 | 344 | parse_space1, |
317 | | - alt((tag("TYPE_CHECKING"), tag("typing.TYPE_CHECKING"))), |
| 345 | + alt( |
| 346 | + ( |
| 347 | + tag("TYPE_CHECKING"), |
| 348 | + preceded(parse_identifier, tag(".TYPE_CHECKING")), |
| 349 | + ), |
| 350 | + ), |
318 | 351 | parse_space0, |
319 | 352 | tag(":"), |
320 | 353 | ) |
@@ -575,6 +608,18 @@ import baz |
575 | 608 |
|
576 | 609 | (r#" |
577 | 610 | import foo |
| 611 | +if t.TYPE_CHECKING: import bar |
| 612 | +import baz |
| 613 | +"#, &[("foo", false), ("bar", true), ("baz", false)]), |
| 614 | +
|
| 615 | + (r#" |
| 616 | +import foo |
| 617 | +if some_WE1RD_alias.TYPE_CHECKING: import bar |
| 618 | +import baz |
| 619 | +"#, &[("foo", false), ("bar", true), ("baz", false)]), |
| 620 | +
|
| 621 | + (r#" |
| 622 | +import foo |
578 | 623 | if TYPE_CHECKING : import bar |
579 | 624 | import baz |
580 | 625 | "#, &[("foo", false), ("bar", true), ("baz", false)]), |
@@ -734,17 +779,27 @@ if TYPE_CHECKING: |
734 | 779 | let imports = parse_imports( |
735 | 780 | " |
736 | 781 | import a |
| 782 | +import a.b # Comment afterwards. |
737 | 783 | from b import c |
738 | 784 | from d import (e) |
739 | | -from f import *", |
| 785 | +from f import * |
| 786 | +from something.foo import * # Comment afterwards. |
| 787 | +if True: |
| 788 | + from indented import foo |
| 789 | +from ñon_ascii_变 import ラーメン |
| 790 | +", |
740 | 791 | ) |
741 | 792 | .unwrap(); |
742 | 793 | assert_eq!( |
743 | 794 | vec![ |
744 | 795 | ("a".to_owned(), "import a".to_owned()), |
| 796 | + ("a.b".to_owned(), "import a.b # Comment afterwards.".to_owned()), |
745 | 797 | ("b.c".to_owned(), "from b import c".to_owned()), |
746 | 798 | ("d.e".to_owned(), "from d import (e)".to_owned()), |
747 | 799 | ("f.*".to_owned(), "from f import *".to_owned()), |
| 800 | + ("something.foo.*".to_owned(), "from something.foo import * # Comment afterwards.".to_owned()), |
| 801 | + ("indented.foo".to_owned(), "from indented import foo".to_owned()), |
| 802 | + ("ñon_ascii_变.ラーメン".to_owned(), "from ñon_ascii_变 import ラーメン".to_owned()), |
748 | 803 | ], |
749 | 804 | imports |
750 | 805 | .into_iter() |
|
0 commit comments