Project

General

Profile

Feature #4287 » also_scrape_title_and_desc.patch

Em Smith, 2017-09-09 10:23

View differences:

src/epggrab/module/eit.c
429 429
  return 0;
430 430
}
431 431

  
432
/* Scrape episode data from the broadcast data.
433
 * @param text - string from broadcaster to search.
434
 * @param eit_mod - our module with regex to use.
435
 * @param en - [out] episode data
436
 * @param first_aired - [out] airdate
437
 * @return Bitmask of changed fields.
438
 */
439
static uint32_t _eit_scrape_episode(const char *str,
440
                                    eit_module_t *eit_mod,
441
                                    epg_episode_num_t *en,
442
                                    time_t *first_aired)
443
{
444
  uint32_t changed = 0;
445
  /* search for season number */
446
  char buffer[2048];
447
  if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &eit_mod->p_snum))
448
    if ((en->s_num = atoi(buffer))) {
449
      tvhtrace(LS_TBL_EIT,"  extract season number %d using %s", en->s_num, eit_mod->id);
450
      changed |= EPG_CHANGED_EPISODE;
451
    }
452

  
453
  /* ...for episode number */
454
  if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &eit_mod->p_enum))
455
    if ((en->e_num = atoi(buffer))) {
456
      tvhtrace(LS_TBL_EIT,"  extract episode number %d using %s", en->e_num, eit_mod->id);
457
      changed |= EPG_CHANGED_EPISODE;
458
    }
459

  
460
  /* Extract original air date year */
461
  if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &eit_mod->p_airdate)) {
462
    if (strlen(buffer) == 4) {
463
      /* Year component only */
464
      const int year = atoi(buffer);
465
      if (year) {
466
        struct tm airdate;
467
        memset(&airdate, 0, sizeof(airdate));
468
        airdate.tm_year = year - 1900;
469
        /* Remaining fields in airdate can all remain at zero but day
470
         * of month is one-based
471
         */
472
        airdate.tm_mday = 1;
473
        *first_aired = mktime(&airdate);
474
        changed |= EPG_CHANGED_FIRST_AIRED;
475
      }
476
    }
477
  }
478
  return changed;
479
}
480

  
432 481

  
433 482
/* ************************************************************************
434 483
 * EIT Event
......
600 649
    ee = epg_episode_find_by_broadcast(ebc, mod, 1, save, &changes4);
601 650
  }
602 651

  
652
  /* Scrape episode from within broadcast data */
603 653
  epg_episode_num_t en;
604 654
  memset(&en, 0, sizeof(en));
605
  /* We use a separate "first_aired_set" variable otherwise
606
   * we don't know the difference between a null date and a
607
   * programme that happened to start in 1974.
608
   */
609 655
  time_t first_aired = 0;
610
  int    first_aired_set = 0;
611

  
612
  if (ev.summary) {
613
    /* search for season number */
614
    char buffer[2048];
615
    const char* summary = lang_str_get(ev.summary, ev.default_charset);
616
    if (eit_pattern_apply_list(buffer, sizeof(buffer), summary, &eit_mod->p_snum))
617
      if ((en.s_num = atoi(buffer)))
618
        tvhtrace(LS_TBL_EIT,"  extract season number %d using %s", en.s_num, mod->id);
619
    /* ...for episode number */
620
    if (eit_pattern_apply_list(buffer, sizeof(buffer), summary, &eit_mod->p_enum))
621
      if ((en.e_num = atoi(buffer)))
622
        tvhtrace(LS_TBL_EIT,"  extract episode number %d using %s", en.e_num, mod->id);
623

  
624
    /* Extract original air date year */
625
    if (eit_pattern_apply_list(buffer, sizeof(buffer), summary, &eit_mod->p_airdate)) {
626
      if (strlen(buffer) == 4) {
627
        /* Year component only */
628
        const int year = atoi(buffer);
629
        if (year) {
630
          struct tm airdate;
631
          memset(&airdate, 0, sizeof(airdate));
632
          airdate.tm_year = year - 1900;
633
          /* Remaining fields in airdate can all remain at zero but day
634
           * of month is one-based
635
           */
636
          airdate.tm_mday = 1;
637
          first_aired = mktime(&airdate);
638
          first_aired_set = 1;
639
        }
640
      }
641
    }
642
  }
656
  uint32_t scraped = 0;
657

  
658
  /* We search across all the main fields using the same regex and
659
   * merge the results.
660
   */
661
  if (ev.title)
662
    scraped |=  _eit_scrape_episode(lang_str_get(ev.title, ev.default_charset),
663
                                    eit_mod, &en, &first_aired);
664
  if (ev.desc)
665
    scraped |=  _eit_scrape_episode(lang_str_get(ev.desc, ev.default_charset),
666
                                    eit_mod, &en, &first_aired);
667

  
668
  if (ev.summary)
669
    scraped |= _eit_scrape_episode(lang_str_get(ev.summary, ev.default_charset),
670
                                   eit_mod, &en, &first_aired);
671

  
643 672

  
644 673
  /* Update Episode */
645 674
  if (ee) {
......
660 689
    /* save any found episode number */
661 690
    if (en.s_num || en.e_num || en.p_num)
662 691
      *save |= epg_episode_set_epnum(ee, &en, &changes4);
663
    if (first_aired_set)
692
    if (scraped & EPG_CHANGED_FIRST_AIRED)
664 693
      *save |= epg_episode_set_first_aired(ee, first_aired, &changes4);
665 694

  
666 695
    *save |= epg_episode_change_finish(ee, changes4, 0);
(1-1/4)