Skip to content

Commit 5f36177

Browse files
committed
refactor: update code to get site parsers from database.
BREAKING CHANGE: site parsers from application.properties are now ignored. It's supposed that they have been migrated by running application with the code from previous commit. Otherwise, administrator can migrate them manually. Addressed to #975
1 parent 338a5b2 commit 5f36177

File tree

9 files changed

+38
-70
lines changed

9 files changed

+38
-70
lines changed

docs/diagrams/parse-page.png

-12.9 KB
Loading

docs/diagrams/parse-page.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
title Series import flow: stage 3 (parse page)
22

33
participant DownloadingSucceededEventListener
4+
participant SiteParserService
45
participant SeriesImportService
56
participant SiteParser
67
participant ExtractorService
78
participant SeriesSalesImportService
89
participant EventPublisher
910

11+
DownloadingSucceededEventListener->+SiteParserService: page URL
12+
SiteParserService->-DownloadingSucceededEventListener: site parser
1013
DownloadingSucceededEventListener->+SeriesImportService: requestId
1114
SeriesImportService->-DownloadingSucceededEventListener: content of downloaded page
1215
DownloadingSucceededEventListener->+SiteParser: content of downloaded page

src/main/java/ru/mystamps/web/feature/series/importing/event/DownloadingSucceededEventListener.java

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public class DownloadingSucceededEventListener
5959

6060
@PostConstruct
6161
public void init() {
62+
// TODO: get all parser names from database
6263
log.info("Registered site parsers: {}", siteParsers);
6364

6465
// TODO: remove migration logic after finishing migration
@@ -71,26 +72,17 @@ public void onApplicationEvent(DownloadingSucceeded event) {
7172

7273
log.info("Request #{}: downloading succeeded", requestId);
7374

74-
String content = seriesImportService.getDownloadedContent(requestId);
75-
if (content == null) {
75+
SiteParser parser = siteParserService.findForUrl(event.getUrl());
76+
if (parser == null) {
7677
// TODO: how to handle error? maybe publish UnexpectedErrorEvent?
77-
log.error("Request #{}: could not load a content from database", requestId);
78+
log.error("Request #{}: could not find appropriate parser", requestId);
7879
return;
7980
}
8081

81-
// TODO: replace with siteParserService.findForUrl(url) and update diagrams
82-
String url = event.getUrl();
83-
SiteParser parser = null;
84-
for (SiteParser candidate : siteParsers) {
85-
if (candidate.canParse(url)) {
86-
parser = candidate;
87-
break;
88-
}
89-
}
90-
91-
if (parser == null) {
82+
String content = seriesImportService.getDownloadedContent(requestId);
83+
if (content == null) {
9284
// TODO: how to handle error? maybe publish UnexpectedErrorEvent?
93-
log.error("Request #{}: could not find appropriate parser", requestId);
85+
log.error("Request #{}: could not load a content from database", requestId);
9486
return;
9587
}
9688

src/main/java/ru/mystamps/web/feature/series/importing/extractor/JsoupSiteParser.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import lombok.AccessLevel;
3232
import lombok.Getter;
33+
import lombok.NoArgsConstructor;
3334
import lombok.Setter;
3435

3536
// Getters/setters are being used in unit tests
@@ -41,13 +42,16 @@
4142
// false positive because setField() modifies the fields
4243
"PMD.ImmutableField"
4344
})
45+
// TODO: consider to remove no-arg constructor after removing code for creating parsers from file
46+
@NoArgsConstructor
4447
public class JsoupSiteParser implements SiteParser {
4548
private static final Logger LOG = LoggerFactory.getLogger(JsoupSiteParser.class);
4649

4750
// When you're adding a new field don't forget to also update:
4851
// - JsoupSiteParser.setField()
4952
// - JsoupSiteParser.isFullyInitialized() (optionally)
5053
// - JsoupSiteParserTest.describe()
54+
// - JsoupSiteParser constructor
5155
// - SiteParserConfiguration
5256
private String name;
5357
private String matchedUrl;
@@ -61,6 +65,21 @@ public class JsoupSiteParser implements SiteParser {
6165
private String priceLocator;
6266
private String currencyValue;
6367

68+
// @todo #975 SiteParserServiceImpl: add unit tests for constructor
69+
public JsoupSiteParser(SiteParserConfiguration cfg) {
70+
name = cfg.getName();
71+
matchedUrl = cfg.getMatchedUrl();
72+
categoryLocator = cfg.getCategoryLocator();
73+
countryLocator = cfg.getCountryLocator();
74+
shortDescriptionLocator = cfg.getShortDescriptionLocator();
75+
imageUrlLocator = cfg.getImageUrlLocator();
76+
imageUrlAttribute = cfg.getImageUrlAttribute();
77+
issueDateLocator = cfg.getIssueDateLocator();
78+
sellerLocator = cfg.getSellerLocator();
79+
priceLocator = cfg.getPriceLocator();
80+
currencyValue = cfg.getCurrencyValue();
81+
}
82+
6483
// TODO: remove because it's only needed for migrating configuration from file to database
6584
public SiteParserConfiguration toConfiguration() {
6685
SiteParserConfiguration cfg = new SiteParserConfiguration(name, matchedUrl);
@@ -153,14 +172,6 @@ public boolean isFullyInitialized() {
153172
);
154173
}
155174

156-
@Override
157-
public boolean canParse(String url) {
158-
Validate.validState(url != null, "Site URL must be non-null");
159-
Validate.validState(matchedUrl != null, "Matched URL must be set");
160-
161-
return url.startsWith(matchedUrl);
162-
}
163-
164175
/**
165176
* Parse HTML document to get info about series.
166177
*

src/main/java/ru/mystamps/web/feature/series/importing/extractor/SiteParser.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,5 @@ public interface SiteParser {
2323

2424
boolean isFullyInitialized();
2525

26-
// TODO: remove because it's only needed for migrating configuration from file to database
27-
boolean canParse(String url);
28-
2926
SeriesInfo parse(String htmlPage);
3027
}

src/main/java/ru/mystamps/web/feature/series/importing/extractor/SiteParserService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@
1919

2020
public interface SiteParserService {
2121
void add(SiteParserConfiguration cfg);
22-
SiteParserConfiguration findForUrl(String url);
22+
SiteParser findForUrl(String url);
2323
}

src/main/java/ru/mystamps/web/feature/series/importing/extractor/SiteParserServiceImpl.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public void add(SiteParserConfiguration cfg) {
6262
// @todo #975 SiteParserServiceImpl.findForUrl(): add unit tests
6363
@Override
6464
@Transactional(readOnly = true)
65-
public SiteParserConfiguration findForUrl(String url) {
65+
public SiteParser findForUrl(String url) {
6666
Validate.isTrue(url != null, "Url must be non null");
6767

6868
Integer parserId = siteParserDao.findParserIdForUrl(url);
@@ -71,7 +71,13 @@ public SiteParserConfiguration findForUrl(String url) {
7171
return null;
7272
}
7373

74-
return siteParserDao.findConfigurationForParser(parserId);
74+
SiteParserConfiguration cfg = siteParserDao.findConfigurationForParser(parserId);
75+
if (cfg == null) {
76+
log.warn("Could not find configuration for parser #{}", parserId);
77+
return null;
78+
}
79+
80+
return new JsoupSiteParser(cfg);
7581
}
7682

7783
}

src/main/java/ru/mystamps/web/feature/series/importing/extractor/TimedSiteParser.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ public boolean isFullyInitialized() {
4646
return parser.isFullyInitialized();
4747
}
4848

49-
@Override
50-
public boolean canParse(String url) {
51-
return parser.canParse(url);
52-
}
53-
5449
@Override
5550
public SeriesInfo parse(String htmlPage) {
5651
// Why we don't use Spring's StopWatch?

src/test/java/ru/mystamps/web/feature/series/importing/extractor/JsoupSiteParserTest.java

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -286,42 +286,6 @@ public void isFullyInitializedWhenAllMandatoryFieldsAreSet() {
286286
assertThat(msg, parser.isFullyInitialized(), is(true));
287287
}
288288

289-
//
290-
// Tests for canParse()
291-
//
292-
293-
@Test
294-
public void canParseShouldRequireNonNullUrl() {
295-
thrown.expect(IllegalStateException.class);
296-
thrown.expectMessage("Site URL must be non-null");
297-
298-
parser.canParse(null);
299-
}
300-
301-
@Test
302-
public void canParseShouldRequireNonNullMatchedUrl() {
303-
parser.setMatchedUrl(null);
304-
305-
thrown.expect(IllegalStateException.class);
306-
thrown.expectMessage("Matched URL must be set");
307-
308-
parser.canParse(Random.url());
309-
}
310-
311-
@Test
312-
public void canParseWithUnsupportedUrl() {
313-
parser.setMatchedUrl("http://example.org");
314-
315-
assertThat(parser.canParse("http://example.com/test/fail"), is(false));
316-
}
317-
318-
@Test
319-
public void canParseWithSupportedUrl() {
320-
parser.setMatchedUrl("http://example.org");
321-
322-
assertThat(parser.canParse("http://example.org/test/success"), is(true));
323-
}
324-
325289
//
326290
// Tests for parse()
327291
//

0 commit comments

Comments
 (0)