Implement extracting HTML bodies

This commit is contained in:
Andrey Golovizin 2021-07-18 16:24:00 +02:00
parent 202736e0bd
commit b1e9a349c8
3 changed files with 41 additions and 47 deletions

View file

@ -3,7 +3,7 @@ name = "mailcat"
version = "0.1.0" version = "0.1.0"
edition = "2018" edition = "2018"
authors = ["Andrey Golovizin <ag@sologoc.com>"] authors = ["Andrey Golovizin <ag@sologoc.com>"]
license = "MIT" license = "GPL-3.0+"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View file

@ -29,9 +29,6 @@ fn main() -> Result<(), anyhow::Error> {
println!("Date: {}", date); println!("Date: {}", date);
} }
println!("---"); println!("---");
print!( print!("{}", message.body()?.text.unwrap_or_default());
"{}",
message.body()?.unwrap_or_default().text.unwrap_or_default()
);
Ok(()) Ok(())
} }

View file

@ -3,13 +3,33 @@ use mailparse::{DispositionType, MailHeaderMap, ParsedMail};
#[derive(PartialEq, Eq, Debug, Default)] #[derive(PartialEq, Eq, Debug, Default)]
pub struct Body { pub struct Body {
pub text: Option<String>, pub text: Option<String>,
pub html: Option<String>,
} }
pub trait ParsedMailExt { pub trait ParsedMailExt {
fn is_attachment(&self) -> bool; fn is_attachment(&self) -> bool;
fn subject(&self) -> Option<String>; fn subject(&self) -> Option<String>;
fn date(&self) -> Result<Option<chrono::DateTime<chrono::FixedOffset>>, anyhow::Error>; fn date(&self) -> Result<Option<chrono::DateTime<chrono::FixedOffset>>, anyhow::Error>;
fn body(&self) -> Result<Option<Body>, anyhow::Error>; fn body(&self) -> Result<Body, anyhow::Error>;
}
fn find_body(message: &ParsedMail, body: &mut Body) -> Result<(), anyhow::Error> {
if message.is_attachment() {
return Ok(());
}
let mimetype: mime::Mime = message.ctype.mimetype.parse()?;
if mimetype == mime::TEXT_PLAIN {
body.text.get_or_insert(message.get_body()?);
} else if mimetype == mime::TEXT_HTML {
body.html.get_or_insert(message.get_body()?);
}
for subpart in &message.subparts {
find_body(subpart, body)?;
if body.text.is_some() && body.html.is_some() {
return Ok(());
}
}
Ok(())
} }
impl<'a> ParsedMailExt for ParsedMail<'a> { impl<'a> ParsedMailExt for ParsedMail<'a> {
@ -29,22 +49,10 @@ impl<'a> ParsedMailExt for ParsedMail<'a> {
Ok(date) Ok(date)
} }
fn body(&self) -> Result<Option<Body>, anyhow::Error> { fn body(&self) -> Result<Body, anyhow::Error> {
let mimetype: mime::Mime = self.ctype.mimetype.parse()?; let mut body = Body::default();
if self.is_attachment() { find_body(self, &mut body)?;
return Ok(None); Ok(body)
}
if mimetype == mime::TEXT_PLAIN {
return Ok(Some(Body {
text: Some(self.get_body()?),
}));
}
for subpart in &self.subparts {
if let Some(body) = subpart.body()? {
return Ok(Some(body));
}
}
Ok(None)
} }
} }
@ -85,14 +93,7 @@ Prost=FD text.
Content-Transfer-Encoding: quoted-printable Content-Transfer-Encoding: quoted-printable
Content-Type: text/html; charset="UTF-8" Content-Type: text/html; charset="UTF-8"
<html> <strong>Tu=C4=8Dn=C3=BD HTML text</strong>
<head>
<meta http-equiv=3D"content-type" content=3D"text/html; charset=3DUTF-8">
</head>
<body><p style=3D"margin-top:0;margin-bottom:0;margin-left:0;margin-right:0=
;"><strong>N=C4=9Bjaky HTML text.</strong></p>
</body>
</html>
--nextPart3377186.iIbC2pHGDl-- --nextPart3377186.iIbC2pHGDl--
"#; "#;
@ -124,14 +125,7 @@ UHJvc3TDvSB0ZXh0Lg==
Content-Transfer-Encoding: quoted-printable Content-Transfer-Encoding: quoted-printable
Content-Type: text/html; charset="UTF-8" Content-Type: text/html; charset="UTF-8"
<html> <strong>Tu=C4=8Dn=C3=BD HTML text</strong>
<head>
<meta http-equiv=3D"content-type" content=3D"text/html; charset=3DUTF-8">
</head>
<body><p style=3D"margin-top:0;margin-bottom:0;margin-left:0;margin-right:0=
;"><strong>Tu=C4=8Dn=C3=BD HTML text</strong></p>
</body>
</html>
--nextPart5630828.MhkbZ0Pkbq-- --nextPart5630828.MhkbZ0Pkbq--
--nextPart1698715.VLH7GnMWUR --nextPart1698715.VLH7GnMWUR
@ -159,9 +153,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo=
); );
assert_eq!( assert_eq!(
message.body()?, message.body()?,
Some(Body { Body {
text: Some("Prostý text.".to_string()) text: Some("Prostý text.".to_string()),
}), html: None,
},
); );
Ok(()) Ok(())
} }
@ -180,9 +175,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo=
); );
assert_eq!( assert_eq!(
message.body()?, message.body()?,
Some(Body { Body {
text: Some("Prostý text.".to_string()) text: Some("Prostý text.".to_string()),
}), html: Some("<strong>Tučný HTML text</strong>".to_string()),
},
); );
Ok(()) Ok(())
} }
@ -204,9 +200,10 @@ TsSbamFrw6EgcMWZw61sb2hhLgo=
); );
assert_eq!( assert_eq!(
message.body()?, message.body()?,
Some(Body { Body {
text: Some("Prostý text.".to_string()) text: Some("Prostý text.".to_string()),
}), html: Some("<strong>Tučný HTML text</strong>".to_string()),
},
); );
Ok(()) Ok(())
} }