Feature #4509 » 0001-eit-Scrape-genre-from-text-in-OTA-EIT.-4509.patch
data/conf/epggrab/eit/scrape/Bulsatcom_39E | ||
---|---|---|
1 |
{ |
|
2 |
"season_num": [ |
|
3 |
"сезон ([0-9]+)", |
|
4 |
"[, ] сезон ([0-9]+)", |
|
5 |
"сез.? ([0-9]+)", |
|
6 |
"[, ] с. ([0-9]+)", |
|
7 |
"с. ([0-9]+), еп[.]", |
|
8 |
"с. ([0-9]+)", |
|
9 |
"еп. [0-9]+,.*, ([0-9]+), ?сез" |
|
10 |
], |
|
11 |
"episode_num": [ |
|
12 |
"([0-9]+) серия", |
|
13 |
"еп. ([0-9]+)", |
|
14 |
"[, ] ([0-9]+) еп[.]", |
|
15 |
"([0-9]+) еп.[,]", |
|
16 |
"епизод ([0-9]+)", |
|
17 |
"Епизод ([0-9]+)", |
|
18 |
"[, ] ([0-9]+) епизод", |
|
19 |
"([0-9]+) епизод" |
|
20 |
], |
|
21 |
"airdate": [ |
|
22 |
", ([0-9][0-9][0-9][0-9])" |
|
23 |
], |
|
24 |
"genre" : [ { |
|
25 |
"Romance": ["(драма, романтичен)"], |
|
26 |
"Documentary": ["(документален)"] |
|
27 |
} |
|
28 |
] |
|
29 |
} |
data/conf/epggrab/eit/scrape/uk | ||
---|---|---|
24 | 24 |
], |
25 | 25 |
"is_new" : [ |
26 | 26 |
"^(New: )" |
27 |
], |
|
28 |
"genre" : [ { |
|
29 |
"Movie / Drama": ["(Movie|Film)"], |
|
30 |
"Detective / Thriller" : ["(Murder mystery|thriller|sleuth|detective|Miss Marple|Poirot|Agatha Christie|^Columbo)"], |
|
31 |
"Adventure / Western / War" : ["(Action adventure|wartime)", "^(Action|Western)"], |
|
32 |
"Science fiction / Fantasy / Horror" : ["^(Sci-fi)", "^(Horror)", "(superhero fantasy)"], |
|
33 |
"Comedy" : ["(Comedy-drama| comedy|action adventure|^Comedy)"], |
|
34 |
"Romance" : ["(Romcom)"], |
|
35 |
"Adult movie / Drama" : ["(18[+])", "(Adults only)", "(Very strong language|Extreme violence)"], |
|
36 |
"News / Current affairs" : ["(BBC News|ITV News|Sky News)"], |
|
37 |
"News / Weather report": ["(Followed by [Ww]eather|weather forecast|Shipping Forecast)"], |
|
38 |
"Documentary" : ["(Documentary series)"], |
|
39 |
"Talk show" : ["(chats about)"], |
|
40 |
"Sports" : ["^(Snooker)"], |
|
41 |
"Football / Soccer" : ["^(Football|Match of the)", "(NFL|Premier League)"], |
|
42 |
"Team sports (excluding football)" : ["^(Rugby)"], |
|
43 |
"Equestrian" : ["Racing.*(Ascot|Cheltenham)"], |
|
44 |
"Children's / Youth programs" : ["(Family animation|Children's comedy)"], |
|
45 |
"Cartoons / Puppets" : ["(Family animation)"], |
|
46 |
"Music / Ballet / Dance" : ["(Dancing)"], |
|
47 |
"Nature / Animals / Environment" : ["(Attenborough)"], |
|
48 |
"Social / Political issues / Economics" : ["( politics)", "(Mayor's )?Question Time", "House of (Lords|Commons)", "Welsh Assembly|in Parliament" ], |
|
49 |
"Advertisement / Shopping" : ["(Auction|Teleshopping)"], |
|
50 |
"Cooking" : ["(cooks up|whips up)"] |
|
51 |
} |
|
27 | 52 |
] |
28 | 53 |
} |
src/epg.c | ||
---|---|---|
2713 | 2713 |
return (*a == '\0' && *b == '\0'); // end of string(both) |
2714 | 2714 |
} |
2715 | 2715 | |
2716 |
static uint8_t _epg_genre_find_by_name ( const char *name, const char *lang )
|
|
2716 |
uint8_t epg_genre_find_by_name ( const char *name, const char *lang )
|
|
2717 | 2717 |
{ |
2718 | 2718 |
uint8_t a, b; |
2719 | 2719 |
const char *s; |
... | ... | |
2812 | 2812 |
int epg_genre_list_add_by_str ( epg_genre_list_t *list, const char *str, const char *lang ) |
2813 | 2813 |
{ |
2814 | 2814 |
epg_genre_t g; |
2815 |
g.code = _epg_genre_find_by_name(str, lang);
|
|
2815 |
g.code = epg_genre_find_by_name(str, lang); |
|
2816 | 2816 |
return epg_genre_list_add(list, &g); |
2817 | 2817 |
} |
2818 | 2818 |
src/epg.h | ||
---|---|---|
97 | 97 |
/* Search */ |
98 | 98 |
int epg_genre_list_contains |
99 | 99 |
( epg_genre_list_t *list, epg_genre_t *genre, int partial ); |
100 | ||
100 |
uint8_t epg_genre_find_by_name ( const char *name, const char *lang ); |
|
101 | 101 |
/* List all available genres */ |
102 | 102 |
htsmsg_t *epg_genres_list_all ( int major_only, int major_prefix, const char *lang ); |
103 | 103 |
src/epggrab.h | ||
---|---|---|
273 | 273 |
char *scrape_config; ///< Config to use or blank/NULL for default. |
274 | 274 |
int scrape_episode; ///< Scrape season/episode from EIT summary |
275 | 275 |
int scrape_subtitle;///< Scrape subtitle from EIT summary |
276 |
int scrape_genre; ///< Scrape genre from EIT text fields |
|
276 | 277 |
}; |
277 | 278 | |
278 | 279 |
/* |
src/epggrab/module.c | ||
---|---|---|
312 | 312 |
.off = offsetof(epggrab_module_ota_scraper_t, scrape_subtitle), |
313 | 313 |
.group = 2, |
314 | 314 |
}, |
315 |
{ |
|
316 |
.type = PT_BOOL, |
|
317 |
.id = "scrape_genre", |
|
318 |
.name = N_("Scrape Genre"), |
|
319 |
.desc = N_("Enable/disable scraping genre from the programme text fields. " |
|
320 |
"Some broadcasters do not send genre information or " |
|
321 |
"send inadequate genre information. " |
|
322 |
"This allows scraping of genre " |
|
323 |
"from within the broadcast text fields if supported by the " |
|
324 |
"configuration file. " |
|
325 |
"This is less accurate than information a broadcaster could provide " |
|
326 |
"but is useful when the information is not provided or is poor. " |
|
327 |
"Broadcasters that provide DVB genre information do not require " |
|
328 |
"this option to be enabled but may gain additional genres by " |
|
329 |
"enabling it. For example, UK users benefit from enabling this." |
|
330 |
), |
|
331 |
.off = offsetof(epggrab_module_ota_scraper_t, scrape_genre), |
|
332 |
.group = 2, |
|
333 |
}, |
|
315 | 334 |
{} |
316 | 335 |
} |
317 | 336 |
}; |
src/epggrab/module/eit.c | ||
---|---|---|
46 | 46 |
#define EIT_SPEC_NZ_FREEVIEW 2 |
47 | 47 |
#define EIT_SPEC_UK_CABLE_VIRGIN 3 |
48 | 48 | |
49 |
typedef struct eit_genre_regex |
|
50 |
{ |
|
51 |
uint8_t genre; ///< Genre code from epg.c |
|
52 |
eit_pattern_list_t p_genre; ///< Regex across fields to match this genre. |
|
53 |
} eit_genre_regex_t; |
|
49 | 54 | |
50 | 55 |
/* Provider configuration */ |
51 | 56 |
typedef struct eit_module_t |
... | ... | |
56 | 61 |
eit_pattern_list_t p_airdate; ///< Original air date parser |
57 | 62 |
eit_pattern_list_t p_scrape_subtitle;///< Scrape subtitle from summary data |
58 | 63 |
eit_pattern_list_t p_is_new; ///< Is programme new to air |
64 |
int num_eit_genre_regex; |
|
65 |
eit_genre_regex_t *eit_genres; |
|
59 | 66 |
} eit_module_t; |
60 | 67 | |
61 | 68 |
/* ************************************************************************ |
... | ... | |
486 | 493 |
return changed; |
487 | 494 |
} |
488 | 495 | |
496 |
/* Genre is handle differently to others in that we build |
|
497 |
* up lists of genres in the event and then afterwards if the |
|
498 |
* list exists we then see if the entire list has changed. |
|
499 |
*/ |
|
500 |
static void |
|
501 |
_eit_scrape_genre(const char *str, |
|
502 |
eit_module_t *eit_mod, |
|
503 |
eit_event_t *ev) |
|
504 |
{ |
|
505 |
char buffer[2048]; |
|
506 |
int i = 0; |
|
507 | ||
508 |
if (!str || !*str) return; |
|
509 |
if (!eit_mod->num_eit_genre_regex) return; |
|
510 | ||
511 |
for (; i < eit_mod->num_eit_genre_regex; ++i) { |
|
512 |
eit_genre_regex_t *egr = &eit_mod->eit_genres[i]; |
|
513 |
if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &egr->p_genre)) { |
|
514 |
/* Free'd by caller */ |
|
515 |
if (!ev->genre) ev->genre = calloc(1, sizeof(epg_genre_list_t)); |
|
516 |
epg_genre_list_add_by_eit(ev->genre, egr->genre); |
|
517 |
} |
|
518 |
} |
|
519 |
} |
|
520 | ||
489 | 521 | |
490 | 522 |
/* ************************************************************************ |
491 | 523 |
* EIT Event |
... | ... | |
683 | 715 |
eit_mod, &en, ©right_year, &is_new); |
684 | 716 |
} |
685 | 717 | |
718 |
if (eit_mod->scrape_genre) { |
|
719 |
/* Genre scraping builds up a list in ev.genre so has no |
|
720 |
* "scraped" value here to check. |
|
721 |
*/ |
|
722 |
if (ev.title) |
|
723 |
_eit_scrape_genre(lang_str_get(ev.title, ev.default_charset), |
|
724 |
eit_mod, &ev); |
|
725 |
if (ev.desc) |
|
726 |
_eit_scrape_genre(lang_str_get(ev.desc, ev.default_charset), |
|
727 |
eit_mod, &ev); |
|
728 | ||
729 |
if (ev.summary) |
|
730 |
_eit_scrape_genre(lang_str_get(ev.summary, ev.default_charset), |
|
731 |
eit_mod, &ev); |
|
732 |
} |
|
733 | ||
686 | 734 |
/* Update Episode */ |
687 | 735 |
if (ee) { |
688 | 736 |
*save |= epg_broadcast_set_episode(ebc, ee, &changes2); |
... | ... | |
1080 | 1128 |
eit_pattern_free_list(&mod->p_airdate); |
1081 | 1129 |
eit_pattern_free_list(&mod->p_scrape_subtitle); |
1082 | 1130 |
eit_pattern_free_list(&mod->p_is_new); |
1131 |
mod->num_eit_genre_regex = 0; |
|
1132 |
free(mod->eit_genres); |
|
1133 |
} |
|
1134 | ||
1135 |
/// Convert a message containing an array of genre names to regex matches |
|
1136 |
/// in to internal format for EPG mapping. |
|
1137 |
/// For example: [ { "Documentary" : ["(Documentary series)"] }] becomes |
|
1138 |
/// epg_genre 23 --> regex |
|
1139 |
static void _eit_scrape_load_one_genre_regex(htsmsg_t *m, eit_module_t *mod) |
|
1140 |
{ |
|
1141 |
htsmsg_field_t *f; |
|
1142 |
if (!m) |
|
1143 |
return; |
|
1144 |
HTSMSG_FOREACH(f, m) { |
|
1145 |
htsmsg_t *value = htsmsg_get_list_by_field(f); |
|
1146 |
if (value && f->hmf_name && *f->hmf_name) { |
|
1147 |
const uint8_t genre_int = epg_genre_find_by_name(f->hmf_name, NULL); |
|
1148 |
if (genre_int) { |
|
1149 |
++mod->num_eit_genre_regex; |
|
1150 |
mod->eit_genres = realloc(mod->eit_genres, |
|
1151 |
mod->num_eit_genre_regex * sizeof(eit_genre_regex_t)); |
|
1152 |
eit_genre_regex_t *egr = &mod->eit_genres[mod->num_eit_genre_regex - 1]; |
|
1153 |
egr->genre = genre_int; |
|
1154 |
eit_pattern_compile_list(&egr->p_genre, value); |
|
1155 |
tvhinfo(LS_TBL_EIT, "module %s - Scrape \"%s\" to genre 0x%x", mod->id, f->hmf_name, genre_int); |
|
1156 |
} |
|
1157 |
} |
|
1158 |
} |
|
1159 |
} |
|
1160 | ||
1161 | ||
1162 |
static void _eit_scrape_load_genre_regex(htsmsg_t *m, eit_module_t *mod) |
|
1163 |
{ |
|
1164 |
htsmsg_field_t *f; |
|
1165 |
if (!m) |
|
1166 |
return; |
|
1167 |
HTSMSG_FOREACH(f, m) { |
|
1168 |
htsmsg_t *value = htsmsg_get_map_by_field(f); |
|
1169 |
_eit_scrape_load_one_genre_regex(value, mod); |
|
1170 |
} |
|
1083 | 1171 |
} |
1084 | 1172 | |
1085 | 1173 |
static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod ) |
... | ... | |
1095 | 1183 |
eit_pattern_compile_list(&mod->p_scrape_subtitle, htsmsg_get_list(m, "scrape_subtitle")); |
1096 | 1184 |
} |
1097 | 1185 | |
1186 |
if (mod->scrape_genre) { |
|
1187 |
_eit_scrape_load_genre_regex(htsmsg_get_list(m, "genre"), mod); |
|
1188 |
} |
|
1098 | 1189 |
return 1; |
1099 | 1190 |
} |
1100 | 1191 | |
1101 | 1192 |
static void _eit_module_load_config(eit_module_t *mod) |
1102 | 1193 |
{ |
1103 |
if (!mod->scrape_episode && !mod->scrape_subtitle) { |
|
1194 |
if (!mod->scrape_episode && !mod->scrape_subtitle && !mod->scrape_genre) {
|
|
1104 | 1195 |
tvhinfo(LS_TBL_EIT, "module %s - scraper disabled by config", mod->id); |
1105 | 1196 |
return; |
1106 | 1197 |
} |
src/webui/static/app/tvheadend.js | ||
---|---|---|
166 | 166 |
"15" : "couch_and_lamp", |
167 | 167 |
"16" : "red_heart", |
168 | 168 |
"18" : "no_one_under_eighteen_symbol", |
169 |
"21" : "sun_behind_cloud", |
|
169 | 170 |
"24" : "speaking_head_in_silhouette", |
170 | 171 |
"33" : "speaking_head_in_silhouette", |
171 | 172 |
"43" : "soccer_ball", |
172 |
- |