HtmlJobMetadataExtractor.java
package com.wilzwert.myjobs.core.domain.model.job.ports.driven.extractor.impl;
import com.wilzwert.myjobs.core.domain.model.job.JobMetadata;
import com.wilzwert.myjobs.core.domain.model.job.ports.driven.extractor.JobMetadataExtractor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.util.Optional;
/**
* @author Wilhelm Zwertvaegher
*/
public class HtmlJobMetadataExtractor implements JobMetadataExtractor {
@Override
public Optional<JobMetadata> extractJobMetadata(String html) {
Document document = Jsoup.parse(html);
String title = document.title();
String description = document.select("meta[name=description]").attr("content");
String h1 = document.select("h1").text();
if(title.isEmpty() && description.isEmpty() && h1.isEmpty()) {
return Optional.empty();
}
return Optional.of(new JobMetadata(title.isEmpty() ? h1 : title, null, null, description, null, null));
}
@Override
public boolean isCompatible(String domain) {
return true;
}
}