From b1e9a349c8e99e37726e972364522030883c705d Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Sun, 18 Jul 2021 16:24:00 +0200 Subject: [PATCH] Implement extracting HTML bodies --- Cargo.toml | 2 +- src/main.rs | 5 +--- src/parsing.rs | 81 ++++++++++++++++++++++++-------------------------- 3 files changed, 41 insertions(+), 47 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8841df8..2c93c30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "mailcat" version = "0.1.0" edition = "2018" authors = ["Andrey Golovizin "] -license = "MIT" +license = "GPL-3.0+" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/main.rs b/src/main.rs index 1c0161c..5379a9f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,9 +29,6 @@ fn main() -> Result<(), anyhow::Error> { println!("Date: {}", date); } println!("---"); - print!( - "{}", - message.body()?.unwrap_or_default().text.unwrap_or_default() - ); + print!("{}", message.body()?.text.unwrap_or_default()); Ok(()) } diff --git a/src/parsing.rs b/src/parsing.rs index d0d399c..7fbeb05 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -3,13 +3,33 @@ use mailparse::{DispositionType, MailHeaderMap, ParsedMail}; #[derive(PartialEq, Eq, Debug, Default)] pub struct Body { pub text: Option, + pub html: Option, } pub trait ParsedMailExt { fn is_attachment(&self) -> bool; fn subject(&self) -> Option; fn date(&self) -> Result>, anyhow::Error>; - fn body(&self) -> Result, anyhow::Error>; + fn body(&self) -> Result; +} + +fn find_body(message: &ParsedMail, body: &mut Body) -> Result<(), anyhow::Error> { + if message.is_attachment() { + return Ok(()); + } + let mimetype: mime::Mime = message.ctype.mimetype.parse()?; + if mimetype == mime::TEXT_PLAIN { + body.text.get_or_insert(message.get_body()?); + } else if mimetype == mime::TEXT_HTML { + body.html.get_or_insert(message.get_body()?); + } + for subpart in &message.subparts { + find_body(subpart, body)?; + if body.text.is_some() && body.html.is_some() { + return Ok(()); + } + } + Ok(()) } impl<'a> ParsedMailExt for ParsedMail<'a> { @@ -29,22 +49,10 @@ impl<'a> ParsedMailExt for ParsedMail<'a> { Ok(date) } - fn body(&self) -> Result, anyhow::Error> { - let mimetype: mime::Mime = self.ctype.mimetype.parse()?; - if self.is_attachment() { - return Ok(None); - } - if mimetype == mime::TEXT_PLAIN { - return Ok(Some(Body { - text: Some(self.get_body()?), - })); - } - for subpart in &self.subparts { - if let Some(body) = subpart.body()? { - return Ok(Some(body)); - } - } - Ok(None) + fn body(&self) -> Result { + let mut body = Body::default(); + find_body(self, &mut body)?; + Ok(body) } } @@ -85,14 +93,7 @@ Prost=FD text. Content-Transfer-Encoding: quoted-printable Content-Type: text/html; charset="UTF-8" - - - - -

N=C4=9Bjaky HTML text.

- - +Tu=C4=8Dn=C3=BD HTML text --nextPart3377186.iIbC2pHGDl-- "#; @@ -124,14 +125,7 @@ UHJvc3TDvSB0ZXh0Lg== Content-Transfer-Encoding: quoted-printable Content-Type: text/html; charset="UTF-8" - - - - -

Tu=C4=8Dn=C3=BD HTML text

- - +Tu=C4=8Dn=C3=BD HTML text --nextPart5630828.MhkbZ0Pkbq-- --nextPart1698715.VLH7GnMWUR @@ -159,9 +153,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo= ); assert_eq!( message.body()?, - Some(Body { - text: Some("Prostý text.".to_string()) - }), + Body { + text: Some("Prostý text.".to_string()), + html: None, + }, ); Ok(()) } @@ -180,9 +175,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo= ); assert_eq!( message.body()?, - Some(Body { - text: Some("Prostý text.".to_string()) - }), + Body { + text: Some("Prostý text.".to_string()), + html: Some("Tučný HTML text".to_string()), + }, ); Ok(()) } @@ -204,9 +200,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo= ); assert_eq!( message.body()?, - Some(Body { - text: Some("Prostý text.".to_string()) - }), + Body { + text: Some("Prostý text.".to_string()), + html: Some("Tučný HTML text".to_string()), + }, ); Ok(()) }