Project

General

Profile

Feature #4421 » telxcc.c

Jaroslav Kysela, 2017-06-30 16:30

 
1
/*!
2
(c) 2011-2013 Forers, s. r. o.: telxcc
3

    
4
telxcc conforms to ETSI 300 706 Presentation Level 1.5: Presentation Level 1 defines the basic Teletext page,
5
characterised by the use of spacing attributes only and a limited alphanumeric and mosaics repertoire.
6
Presentation Level 1.5 decoder responds as Level 1 but the character repertoire is extended via packets X/26.
7
Selection of national option sub-sets related features from Presentation Level 2.5 feature set have been implemented, too.
8
(X/28/0 Format 1, X/28/4, M/29/0 and M/29/4 packets)
9

    
10
Further documentation:
11
ETSI TS 101 154 V1.9.1 (2009-09), Technical Specification
12
  Digital Video Broadcasting (DVB); Specification for the use of Video and Audio Coding in Broadcasting Applications based on the MPEG-2 Transport Stream
13
ETSI EN 300 231 V1.3.1 (2003-04), European Standard (Telecommunications series)
14
  Television systems; Specification of the domestic video Programme Delivery Control system (PDC)
15
ETSI EN 300 472 V1.3.1 (2003-05), European Standard (Telecommunications series)
16
  Digital Video Broadcasting (DVB); Specification for conveying ITU-R System B Teletext in DVB bitstreams
17
ETSI EN 301 775 V1.2.1 (2003-05), European Standard (Telecommunications series)
18
  Digital Video Broadcasting (DVB); Specification for the carriage of Vertical Blanking Information (VBI) data in DVB bitstreams
19
ETS 300 706 (May 1997)
20
  Enhanced Teletext Specification
21
ETS 300 708 (March 1997)
22
  Television systems; Data transmission within Teletext
23
ISO/IEC STANDARD 13818-1 Second edition (2000-12-01)
24
  Information technology — Generic coding of moving pictures and associated audio information: Systems
25
ISO/IEC STANDARD 6937 Third edition (2001-12-15)
26
  Information technology — Coded graphic character set for text communication — Latin alphabet
27
Werner Brückner -- Teletext in digital television
28
*/
29

    
30

    
31
#include "lib_ccx.h"
32
#include "ccx_common_option.h"
33
#include "hamming.h"
34
#include "teletext.h"
35
#include <signal.h>
36
#include "activity.h"
37
#include "ccx_encoders_helpers.h"
38

    
39
#ifdef __MINGW32__
40
// switch stdin and all normal files into binary mode -- needed for Windows
41
#include <fcntl.h>
42
int _CRT_fmode = _O_BINARY;
43

    
44
// for better UX in Windows we want to detect that app is not run by "double-clicking" in Explorer
45
#define WIN32_LEAN_AND_MEAN
46
#define _WIN32_WINNT 0x0502
47
#define _WIN32_IE 0x0400
48
#include <windows.h>
49
#include <commctrl.h>
50
#endif
51

    
52
long long int last_pes_pts = 0; // PTS of last PES packet (debug purposes)
53

    
54
typedef struct {
55
	uint64_t show_timestamp; // show at timestamp (in ms)
56
	uint64_t hide_timestamp; // hide at timestamp (in ms)
57
	uint16_t text[25][40]; // 25 lines x 40 cols (1 screen/page) of wide chars
58
	uint8_t g2_char_present[25][40]; // 0- Supplementary G2 character set NOT used at this position 1-Supplementary G2 character set used at this position
59
	uint8_t tainted; // 1 = text variable contains any data
60
} teletext_page_t;
61

    
62
// application states -- flags for notices that should be printed only once
63
struct s_states {
64
	uint8_t programme_info_processed;
65
	uint8_t pts_initialized;
66
};
67

    
68
typedef enum
69
{
70
	TRANSMISSION_MODE_PARALLEL = 0,
71
	TRANSMISSION_MODE_SERIAL = 1
72
} transmission_mode_t;
73

    
74
struct TeletextCtx
75
{
76
	short int seen_sub_page[MAX_TLT_PAGES];
77
	uint8_t verbose : 1; // should telxcc be verbose?
78
	uint16_t page; // teletext page containing cc we want to filter
79
	uint16_t tid; // 13-bit packet ID for teletext stream
80
	double offset; // time offset in seconds
81
	uint8_t bom : 1; // print UTF-8 BOM characters at the beginning of output
82
	uint8_t nonempty : 1; // produce at least one (dummy) frame
83
	// uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format
84
	// uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too
85
	uint16_t user_page; // Page selected by user, which MIGHT be different to 'page' depending on autodetection stuff
86
	int levdistmincnt, levdistmaxpct; // Means 2 fails or less is "the same", 10% or less is also "the same"
87
	struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process
88
	enum ccx_output_format write_format; // 0=Raw, 1=srt, 2=SMI
89
	int gui_mode_reports; // If 1, output in stderr progress updates so the GUI can grab them
90
	enum ccx_output_date_format date_format;
91
	int noautotimeref; // Do NOT set time automatically?
92
	unsigned send_to_srv;
93
	char millis_separator;
94
	uint32_t global_timestamp;
95

    
96
	// Current and previous page buffers. This is the output written to file when
97
	// the time comes.
98
	teletext_page_t page_buffer;
99
	char *page_buffer_prev;
100
	char *page_buffer_cur;
101
	unsigned page_buffer_cur_size;
102
	unsigned page_buffer_cur_used;
103
	unsigned page_buffer_prev_size;
104
	unsigned page_buffer_prev_used;
105
	// Current and previous page compare strings. This is plain text (no colors,
106
	// tags, etc) in UCS2 (fixed length), so we can compare easily.
107
	uint64_t *ucs2_buffer_prev;
108
	uint64_t *ucs2_buffer_cur;
109
	unsigned ucs2_buffer_cur_size;
110
	unsigned ucs2_buffer_cur_used;
111
	unsigned ucs2_buffer_prev_size;
112
	unsigned ucs2_buffer_prev_used;
113
	// Buffer timestamp
114
	uint64_t prev_hide_timestamp;
115
	uint64_t prev_show_timestamp;
116
	// subtitle type pages bitmap, 2048 bits = 2048 possible pages in teletext (excl. subpages)
117
	uint8_t cc_map[256];
118
	// last timestamp computed
119
	uint64_t last_timestamp;
120
	struct s_states states;
121
	// FYI, packet counter
122
	uint32_t tlt_packet_counter;
123
	// teletext transmission mode
124
	transmission_mode_t transmission_mode;
125
	// flag indicating if incoming data should be processed or ignored
126
	uint8_t receiving_data;
127

    
128
	uint8_t using_pts;
129
	int64_t delta;
130
	uint32_t t0;
131

    
132
	int sentence_cap;//Set to 1 if -sc is passed
133
	int new_sentence;
134
	int splitbysentence;
135

    
136
};
137
typedef enum
138
{
139
	DATA_UNIT_EBU_TELETEXT_NONSUBTITLE = 0x02,
140
	DATA_UNIT_EBU_TELETEXT_SUBTITLE = 0x03,
141
	DATA_UNIT_EBU_TELETEXT_INVERTED = 0x0c,
142
	DATA_UNIT_VPS = 0xc3,
143
	DATA_UNIT_CLOSED_CAPTIONS = 0xc5
144
} data_unit_t;
145

    
146
static const char* TTXT_COLOURS[8] = {
147
	//black,   red,       green,     yellow,    blue,      magenta,   cyan,      white
148
	"#000000", "#ff0000", "#00ff00", "#ffff00", "#0000ff", "#ff00ff", "#00ffff", "#ffffff"
149
};
150

    
151
#define MAX_TLT_PAGES 1000
152

    
153

    
154
// 1-byte alignment; just to be sure, this struct is being used for explicit type conversion
155
// FIXME: remove explicit type conversion from buffer to structs
156
#pragma pack(push)
157
#pragma pack(1)
158
typedef struct {
159
	uint8_t _clock_in; // clock run in
160
	uint8_t _framing_code; // framing code, not needed, ETSI 300 706: const 0xe4
161
	uint8_t address[2];
162
	uint8_t data[40];
163
} teletext_packet_payload_t;
164
#pragma pack(pop)
165

    
166
// application config global variable
167
struct ccx_s_teletext_config tlt_config = { 0};
168

    
169
// macro -- output only when increased verbosity was turned on
170
#define VERBOSE_ONLY if (tlt_config.verbose == YES)
171

    
172
// current charset (charset can be -- and always is -- changed during transmission)
173
struct s_primary_charset {
174
	uint8_t current;
175
	uint8_t g0_m29;
176
	uint8_t g0_x28;
177
} primary_charset = {
178
	0x00, UNDEFINED, UNDEFINED
179
};
180

    
181
// entities, used in colour mode, to replace unsafe HTML tag chars
182
struct {
183
	uint16_t character;
184
	const char *entity;
185
} const ENTITIES[] = {
186
	{ '<', "&lt;" },
187
	{ '>', "&gt;" },
188
	{ '&', "&amp;" }
189
};
190

    
191
#define array_length(a) (sizeof(a)/sizeof(a[0]))
192

    
193
// extracts magazine number from teletext page
194
#define MAGAZINE(p) ((p >> 8) & 0xf)
195

    
196
// extracts page number from teletext page
197
#define PAGE(p) (p & 0xff)
198

    
199
typedef enum
200
{
201
	LATIN = 0,
202
	CYRILLIC1,
203
	CYRILLIC2,
204
	CYRILLIC3,
205
	GREEK,
206
	ARABIC,
207
	HEBREW
208
} g0_charsets_type;
209

    
210
g0_charsets_type default_g0_charset;
211

    
212
// Note: All characters are encoded in UCS-2
213

    
214
// --- G0 ----------------------------------------------------------------------
215

    
216
// G0 charsets
217
uint16_t G0[5][96] = {
218
	{ // Latin G0 Primary Set
219
		0x0020, 0x0021, 0x0022, 0x00a3, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
220
		0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
221
		0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
222
		0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x00ab, 0x00bd, 0x00bb, 0x005e, 0x0023,
223
		0x002d, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
224
		0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x00bc, 0x00a6, 0x00be, 0x00f7, 0x007f
225
	},
226
	{ // Cyrillic G0 Primary Set - Option 1 - Serbian/Croatian
227
		0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
228
		0x0030, 0x0031, 0x3200, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
229
		0x0427, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0408, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
230
		0x041f, 0x040c, 0x0420, 0x0421, 0x0422, 0x0423, 0x0412, 0x0403, 0x0409, 0x040a, 0x0417, 0x040b, 0x0416, 0x0402, 0x0428, 0x040f,
231
		0x0447, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0428, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
232
		0x043f, 0x042c, 0x0440, 0x0441, 0x0442, 0x0443, 0x0432, 0x0423, 0x0429, 0x042a, 0x0437, 0x042b, 0x0436, 0x0422, 0x0448, 0x042f
233
	},
234
	{ // Cyrillic G0 Primary Set - Option 2 - Russian/Bulgarian
235
		0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
236
		0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
237
		0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
238
		0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x042a, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042b,
239
		0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
240
		0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x044a, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044b
241
	},
242
	{ // Cyrillic G0 Primary Set - Option 3 - Ukrainian
243
		0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x00ef, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
244
		0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
245
		0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
246
		0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x0049, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x00cf,
247
		0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
248
		0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x0069, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x00ff
249
	},
250
	{ // Greek G0 Primary Set
251
		0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
252
		0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
253
		0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
254
		0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
255
		0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
256
		0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x03cf
257
	}
258
	//{ // Arabic G0 Primary Set
259
	//},
260
	//{ // Hebrew G0 Primary Set
261
	//}
262
};
263

    
264
// array positions where chars from G0_LATIN_NATIONAL_SUBSETS are injected into G0[LATIN]
265
const uint8_t G0_LATIN_NATIONAL_SUBSETS_POSITIONS[13] = {
266
	0x03, 0x04, 0x20, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x5b, 0x5c, 0x5d, 0x5e
267
};
268

    
269
// ETS 300 706, chapter 15.2, table 32: Function of Default G0 and G2 Character Set Designation
270
// and National Option Selection bits in packets X/28/0 Format 1, X/28/4, M/29/0 and M/29/4
271

    
272
// Latin National Option Sub-sets
273
struct {
274
	const char *language;
275
	uint16_t characters[13];
276
} const G0_LATIN_NATIONAL_SUBSETS[14] = {
277
	{ // 0
278
		"English",
279
		{ 0x00a3, 0x0024, 0x0040, 0x00ab, 0x00bd, 0x00bb, 0x005e, 0x0023, 0x002d, 0x00bc, 0x00a6, 0x00be, 0x00f7 }
280
	},
281
	{ // 1
282
		"French",
283
		{ 0x00e9, 0x00ef, 0x00e0, 0x00eb, 0x00ea, 0x00f9, 0x00ee, 0x0023, 0x00e8, 0x00e2, 0x00f4, 0x00fb, 0x00e7 }
284
	},
285
	{ // 2
286
		"Swedish, Finnish, Hungarian",
287
		{ 0x0023, 0x00a4, 0x00c9, 0x00c4, 0x00d6, 0x00c5, 0x00dc, 0x005f, 0x00e9, 0x00e4, 0x00f6, 0x00e5, 0x00fc }
288
	},
289
	{ // 3
290
		"Czech, Slovak",
291
		{ 0x0023, 0x016f, 0x010d, 0x0165, 0x017e, 0x00fd, 0x00ed, 0x0159, 0x00e9, 0x00e1, 0x011b, 0x00fa, 0x0161 }
292
	},
293
	{ // 4
294
		"German",
295
		{ 0x0023, 0x0024, 0x00a7, 0x00c4, 0x00d6, 0x00dc, 0x005e, 0x005f, 0x00b0, 0x00e4, 0x00f6, 0x00fc, 0x00df }
296
	},
297
	{ // 5
298
		"Portuguese, Spanish",
299
		 { 0x00e7, 0x0024, 0x00a1, 0x00e1, 0x00e9, 0x00ed, 0x00f3, 0x00fa, 0x00bf, 0x00fc, 0x00f1, 0x00e8, 0x00e0 }
300
	},
301
	{ // 6
302
		"Italian",
303
		{ 0x00a3, 0x0024, 0x00e9, 0x00b0, 0x00e7, 0x00bb, 0x005e, 0x0023, 0x00f9, 0x00e0, 0x00f2, 0x00e8, 0x00ec }
304
	},
305
	{ // 7
306
		"Rumanian",
307
		{ 0x0023, 0x00a4, 0x0162, 0x00c2, 0x015e, 0x0102, 0x00ce, 0x0131, 0x0163, 0x00e2, 0x015f, 0x0103, 0x00ee }
308
	},
309
	{ // 8
310
		"Polish",
311
		{ 0x0023, 0x0144, 0x0105, 0x017b, 0x015a, 0x0141, 0x0107, 0x00f3, 0x0119, 0x017c, 0x015b, 0x0142, 0x017a }
312
	},
313
	{ // 9
314
		"Turkish",
315
		{ 0x0054, 0x011f, 0x0130, 0x015e, 0x00d6, 0x00c7, 0x00dc, 0x011e, 0x0131, 0x015f, 0x00f6, 0x00e7, 0x00fc }
316
	},
317
	{ // a
318
		"Serbian, Croatian, Slovenian",
319
		{ 0x0023, 0x00cb, 0x010c, 0x0106, 0x017d, 0x0110, 0x0160, 0x00eb, 0x010d, 0x0107, 0x017e, 0x0111, 0x0161 }
320
	},
321
	{ // b
322
		"Estonian",
323
		{ 0x0023, 0x00f5, 0x0160, 0x00c4, 0x00d6, 0x017e, 0x00dc, 0x00d5, 0x0161, 0x00e4, 0x00f6, 0x017e, 0x00fc }
324
	},
325
	{ // c
326
		"Lettish, Lithuanian",
327
		{ 0x0023, 0x0024, 0x0160, 0x0117, 0x0119, 0x017d, 0x010d, 0x016b, 0x0161, 0x0105, 0x0173, 0x017e, 0x012f }
328
	}
329
};
330

    
331
// References to the G0_LATIN_NATIONAL_SUBSETS array
332
const uint8_t G0_LATIN_NATIONAL_SUBSETS_MAP[56] = {
333
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
334
	0x08, 0x01, 0x02, 0x03, 0x04, 0xff, 0x06, 0xff,
335
	0x00, 0x01, 0x02, 0x09, 0x04, 0x05, 0x06, 0xff,
336
	0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0x07,
337
	0xff, 0xff, 0x0b, 0x03, 0x04, 0xff, 0x0c, 0xff,
338
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
339
	0xff, 0xff, 0xff, 0x09, 0xff, 0xff, 0xff, 0xff
340
};
341

    
342
// --- G2 ----------------------------------------------------------------------
343

    
344
const uint16_t G2[1][96] = {
345
	{ // Latin G2 Supplementary Set
346
		0x0020, 0x00a1, 0x00a2, 0x00a3, 0x0024, 0x00a5, 0x0023, 0x00a7, 0x00a4, 0x2018, 0x201c, 0x00ab, 0x2190, 0x2191, 0x2192, 0x2193,
347
		0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00d7, 0x00b5, 0x00b6, 0x00b7, 0x00f7, 0x2019, 0x201d, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
348
		0x0020, 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0306, 0x0307, 0x0308, 0x0000, 0x030a, 0x0327, 0x005f, 0x030b, 0x0328, 0x030c,
349
		0x2015, 0x00b9, 0x00ae, 0x00a9, 0x2122, 0x266a, 0x20ac, 0x2030, 0x03B1, 0x0000, 0x0000, 0x0000, 0x215b, 0x215c, 0x215d, 0x215e,
350
		0x03a9, 0x00c6, 0x0110, 0x00aa, 0x0126, 0x0000, 0x0132, 0x013f, 0x0141, 0x00d8, 0x0152, 0x00ba, 0x00de, 0x0166, 0x014a, 0x0149,
351
		0x0138, 0x00e6, 0x0111, 0x00f0, 0x0127, 0x0131, 0x0133, 0x0140, 0x0142, 0x00f8, 0x0153, 0x00df, 0x00fe, 0x0167, 0x014b, 0x0020
352
	}
353
//	{ // Cyrillic G2 Supplementary Set
354
//	},
355
//	{ // Greek G2 Supplementary Set
356
//	},
357
//	{ // Arabic G2 Supplementary Set
358
//	}
359
};
360

    
361
const uint16_t G2_ACCENTS[15][52] = {
362
	// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z
363
	{ // grave
364
		0x00c0, 0x0000, 0x0000, 0x0000, 0x00c8, 0x0000, 0x0000, 0x0000, 0x00cc, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d2, 0x0000,
365
		0x0000, 0x0000, 0x0000, 0x0000, 0x00d9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e0, 0x0000, 0x0000, 0x0000, 0x00e8, 0x0000,
366
		0x0000, 0x0000, 0x00ec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f9, 0x0000,
367
		0x0000, 0x0000, 0x0000, 0x0000
368
	},
369
	{ // acute
370
		0x00c1, 0x0000, 0x0106, 0x0000, 0x00c9, 0x0000, 0x0000, 0x0000, 0x00cd, 0x0000, 0x0000, 0x0139, 0x0000, 0x0143, 0x00d3, 0x0000,
371
		0x0000, 0x0154, 0x015a, 0x0000, 0x00da, 0x0000, 0x0000, 0x0000, 0x00dd, 0x0179, 0x00e1, 0x0000, 0x0107, 0x0000, 0x00e9, 0x0000,
372
		0x0123, 0x0000, 0x00ed, 0x0000, 0x0000, 0x013a, 0x0000, 0x0144, 0x00f3, 0x0000, 0x0000, 0x0155, 0x015b, 0x0000, 0x00fa, 0x0000,
373
		0x0000, 0x0000, 0x00fd, 0x017a
374
	},
375
	{ // circumflex
376
		0x00c2, 0x0000, 0x0108, 0x0000, 0x00ca, 0x0000, 0x011c, 0x0124, 0x00ce, 0x0134, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d4, 0x0000,
377
		0x0000, 0x0000, 0x015c, 0x0000, 0x00db, 0x0000, 0x0174, 0x0000, 0x0176, 0x0000, 0x00e2, 0x0000, 0x0109, 0x0000, 0x00ea, 0x0000,
378
		0x011d, 0x0125, 0x00ee, 0x0135, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f4, 0x0000, 0x0000, 0x0000, 0x015d, 0x0000, 0x00fb, 0x0000,
379
		0x0175, 0x0000, 0x0177, 0x0000
380
	},
381
	{ // tilde
382
		0x00c3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0128, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d1, 0x00d5, 0x0000,
383
		0x0000, 0x0000, 0x0000, 0x0000, 0x0168, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
384
		0x0000, 0x0000, 0x0129, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f1, 0x00f5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0169, 0x0000,
385
		0x0000, 0x0000, 0x0000, 0x0000
386
	},
387
	{ // macron
388
		0x0100, 0x0000, 0x0000, 0x0000, 0x0112, 0x0000, 0x0000, 0x0000, 0x012a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014c, 0x0000,
389
		0x0000, 0x0000, 0x0000, 0x0000, 0x016a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0101, 0x0000, 0x0000, 0x0000, 0x0113, 0x0000,
390
		0x0000, 0x0000, 0x012b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016b, 0x0000,
391
		0x0000, 0x0000, 0x0000, 0x0000
392
	},
393
	{ // breve
394
		0x0102, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395
		0x0000, 0x0000, 0x0000, 0x0000, 0x016c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0103, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
396
		0x011f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016d, 0x0000,
397
		0x0000, 0x0000, 0x0000, 0x0000
398
	},
399
	{ // dot
400
		0x0000, 0x0000, 0x010a, 0x0000, 0x0116, 0x0000, 0x0120, 0x0000, 0x0130, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
401
		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017b, 0x0000, 0x0000, 0x010b, 0x0000, 0x0117, 0x0000,
402
		0x0121, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
403
		0x0000, 0x0000, 0x0000, 0x017c
404
	},
405
	{ // umlaut
406
		0x00c4, 0x0000, 0x0000, 0x0000, 0x00cb, 0x0000, 0x0000, 0x0000, 0x00cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d6, 0x0000,
407
		0x0000, 0x0000, 0x0000, 0x0000, 0x00dc, 0x0000, 0x0000, 0x0000, 0x0178, 0x0000, 0x00e4, 0x0000, 0x0000, 0x0000, 0x00eb, 0x0000,
408
		0x0000, 0x0000, 0x00ef, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00fc, 0x0000,
409
		0x0000, 0x0000, 0x00ff, 0x0000
410
	},
411
	{ 0 },
412
	{ // ring
413
		0x00c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
414
		0x0000, 0x0000, 0x0000, 0x0000, 0x016e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
415
		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016f, 0x0000,
416
		0x0000, 0x0000, 0x0000, 0x0000
417
	},
418
	{ // cedilla
419
		0x0000, 0x0000, 0x00c7, 0x0000, 0x0000, 0x0000, 0x0122, 0x0000, 0x0000, 0x0000, 0x0136, 0x013b, 0x0000, 0x0145, 0x0000, 0x0000,
420
		0x0000, 0x0156, 0x015e, 0x0162, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e7, 0x0000, 0x0000, 0x0000,
421
		0x0000, 0x0000, 0x0000, 0x0000, 0x0137, 0x013c, 0x0000, 0x0146, 0x0000, 0x0000, 0x0000, 0x0157, 0x015f, 0x0163, 0x0000, 0x0000,
422
		0x0000, 0x0000, 0x0000, 0x0000
423
	},
424
	{ 0 },
425
	{ // double acute
426
		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0150, 0x0000,
427
		0x0000, 0x0000, 0x0000, 0x0000, 0x0170, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
428
		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0151, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0171, 0x0000,
429
		0x0000, 0x0000, 0x0000, 0x0000
430
	},
431
	{ // ogonek
432
		0x0104, 0x0000, 0x0000, 0x0000, 0x0118, 0x0000, 0x0000, 0x0000, 0x012e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
433
		0x0000, 0x0000, 0x0000, 0x0000, 0x0172, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0105, 0x0000, 0x0000, 0x0000, 0x0119, 0x0000,
434
		0x0000, 0x0000, 0x012f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0173, 0x0000,
435
		0x0000, 0x0000, 0x0000, 0x0000
436
	},
437
	{ // caron
438
		0x0000, 0x0000, 0x010c, 0x010e, 0x011a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x013d, 0x0000, 0x0147, 0x0000, 0x0000,
439
		0x0000, 0x0158, 0x0160, 0x0164, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017d, 0x0000, 0x0000, 0x010d, 0x010f, 0x011b, 0x0000,
440
		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x013e, 0x0000, 0x0148, 0x0000, 0x0000, 0x0000, 0x0159, 0x0161, 0x0165, 0x0000, 0x0000,
441
		0x0000, 0x0000, 0x0000, 0x017e
442
	}
443
};
444
void page_buffer_add_string (struct TeletextCtx *ctx, const char *s)
445
{
446
	if(ctx->page_buffer_cur_size < (ctx->page_buffer_cur_used + strlen (s)+1))
447
	{
448
		int add=strlen (s)+4096; // So we don't need to realloc often
449
		ctx->page_buffer_cur_size=ctx->page_buffer_cur_size+add;
450
		ctx->page_buffer_cur=(char *) realloc (ctx->page_buffer_cur,ctx->page_buffer_cur_size);
451
		if (!ctx->page_buffer_cur)
452
			fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n");
453
	}
454
	memcpy (ctx->page_buffer_cur+ctx->page_buffer_cur_used, s, strlen (s));
455
	ctx->page_buffer_cur_used+=strlen (s);
456
	ctx->page_buffer_cur[ctx->page_buffer_cur_used]=0;
457
}
458

    
459
void ucs2_buffer_add_char (struct TeletextCtx *ctx, uint64_t c)
460
{
461
	if (ctx->ucs2_buffer_cur_size<(ctx->ucs2_buffer_cur_used+2))
462
	{
463
		int add=4096; // So we don't need to realloc often
464
		ctx->ucs2_buffer_cur_size=ctx->ucs2_buffer_cur_size+add;
465
		ctx->ucs2_buffer_cur=(uint64_t *) realloc (ctx->ucs2_buffer_cur,ctx->ucs2_buffer_cur_size*sizeof (uint64_t));
466
		if (!ctx->ucs2_buffer_cur)
467
			fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n");
468
	}
469
	ctx->ucs2_buffer_cur[ctx->ucs2_buffer_cur_used++]=c;
470
	ctx->ucs2_buffer_cur[ctx->ucs2_buffer_cur_used]=0;
471
}
472

    
473
void page_buffer_add_char (struct TeletextCtx *ctx, char c)
474
{
475
	char t[2];
476
	t[0]=c;
477
	t[1]=0;
478
	page_buffer_add_string (ctx, t);
479
}
480

    
481
// ETS 300 706, chapter 8.2
482
uint8_t unham_8_4(uint8_t a)
483
{
484
	uint8_t r = UNHAM_8_4[a];
485
	if (r == 0xff) {
486
		dbg_print (CCX_DMT_TELETEXT, "- Unrecoverable data error; UNHAM8/4(%02x)\n", a);
487
	}
488
	return (r & 0x0f);
489
}
490

    
491
// ETS 300 706, chapter 8.3
492
uint32_t unham_24_18(uint32_t a)
493
{
494
	uint8_t test = 0;
495

    
496
	//Tests A-F correspond to bits 0-6 respectively in 'test'.
497
	for (uint8_t i = 0; i < 23; i++) test ^= ((a >> i) & 0x01) * (i + 33);
498
	//Only parity bit is tested for bit 24
499
	test ^= ((a >> 23) & 0x01) * 32;
500

    
501
	if ((test & 0x1f) != 0x1f)
502
	{
503
		//Not all tests A-E correct
504
		if ((test & 0x20) == 0x20)
505
		{
506
			//F correct: Double error
507
			return 0xffffffff;
508
		}
509
		//Test F incorrect: Single error
510
		a ^= 1 << (30 - test);
511
	}
512

    
513
	return (a & 0x000004) >> 2 | (a & 0x000070) >> 3 | (a & 0x007f00) >> 4 | (a & 0x7f0000) >> 5;
514
}
515

    
516
//Default G0 Character Set
517
void set_g0_charset(uint32_t triplet)
518
{
519
	// ETS 300 706, Table 32
520
	if((triplet & 0x3c00) == 0x1000)
521
	{
522
		if((triplet & 0x0380) == 0x0000)
523
			default_g0_charset = CYRILLIC1;
524
		else if((triplet & 0x0380) == 0x0200)
525
			default_g0_charset = CYRILLIC2;
526
		else if((triplet & 0x0380) == 0x0280)
527
			default_g0_charset = CYRILLIC3;
528
		else
529
			default_g0_charset = LATIN;
530
	}
531
	else
532
		default_g0_charset = LATIN;
533
}
534

    
535
// Latin National Subset Selection
536
void remap_g0_charset(uint8_t c)
537
{
538
	if (c != primary_charset.current)
539
	{
540
		uint8_t m = G0_LATIN_NATIONAL_SUBSETS_MAP[c];
541
		if (m == 0xff)
542
		{
543
			fprintf(stderr, "- G0 Latin National Subset ID 0x%1x.%1x is not implemented\n", (c >> 3), (c & 0x7));
544
		}
545
		else
546
		{
547
			for (uint8_t j = 0; j < 13; j++)
548
				G0[LATIN][G0_LATIN_NATIONAL_SUBSETS_POSITIONS[j]] = G0_LATIN_NATIONAL_SUBSETS[m].characters[j];
549
			VERBOSE_ONLY fprintf(stderr, "- Using G0 Latin National Subset ID 0x%1x.%1x (%s)\n", (c >> 3), (c & 0x7), G0_LATIN_NATIONAL_SUBSETS[m].language);
550
			primary_charset.current = c;
551
		}
552
	}
553
}
554

    
555

    
556
// wide char (16 bits) to utf-8 conversion
557
void ucs2_to_utf8(char *r, uint16_t ch)
558
{
559
	if (ch < 0x80)
560
	{
561
		r[0] = ch & 0x7f;
562
		r[1] = 0;
563
		r[2] = 0;
564
	}
565
	else if (ch < 0x800)
566
	{
567
		r[0] = (ch >> 6) | 0xc0;
568
		r[1] = (ch & 0x3f) | 0x80;
569
		r[2] = 0;
570
	}
571
	else
572
	{
573
		r[0] = (ch >> 12) | 0xe0;
574
		r[1] = ((ch >> 6) & 0x3f) | 0x80;
575
		r[2] = (ch & 0x3f) | 0x80;
576
	}
577
	r[3] = 0;
578
}
579

    
580
// check parity and translate any reasonable teletext character into ucs2
581
uint16_t telx_to_ucs2(uint8_t c)
582
{
583
	if (PARITY_8[c] == 0)
584
	{
585
		dbg_print (CCX_DMT_TELETEXT,  "- Unrecoverable data error; PARITY(%02x)\n", c);
586
		return 0x20;
587
	}
588

    
589
	uint16_t r = c & 0x7f;
590
	if (r >= 0x20)
591
		r = G0[default_g0_charset][r - 0x20];
592
	return r;
593
}
594

    
595
uint16_t bcd_page_to_int (uint16_t bcd)
596
{
597
	return ((bcd&0xf00)>>8)*100 + ((bcd&0xf0)>>4)*10 + (bcd&0xf);
598
}
599

    
600
void telx_case_fix (struct TeletextCtx *context)
601
{
602
	//Capitalizing first letter of every sentence
603
	int line_len = strlen(context->page_buffer_cur);
604
	for(int i = 0; i < line_len; i++)
605
	{
606
		switch(context->page_buffer_cur[i])
607
		{
608
			case ' ':
609
			//case 0x89: // This is a transparent space
610
			case '-':
611
				break;
612
			case '.': // Fallthrough
613
			case '?': // Fallthrough
614
			case '!':
615
			case ':':
616
				context->new_sentence = 1;
617
				break;
618
			default:
619
				if (context->new_sentence)
620
					context->page_buffer_cur[i] = cctoupper(context->page_buffer_cur[i]);
621
				else
622
					context->page_buffer_cur[i] = cctolower(context->page_buffer_cur[i]);
623
				context->new_sentence = 0;
624
				break;
625
		}
626
	}
627
	telx_correct_case(context->page_buffer_cur);
628
}
629

    
630
void telxcc_dump_prev_page (struct TeletextCtx *ctx, struct cc_subtitle *sub)
631
{
632
	char info[4];
633
	if (!ctx->page_buffer_prev)
634
		return;
635

    
636
	snprintf(info, 4, "%.3u", bcd_page_to_int(tlt_config.page));
637
	add_cc_sub_text(sub, ctx->page_buffer_prev, ctx->prev_show_timestamp,
638
		ctx->prev_hide_timestamp, info, "TLT", CCX_ENC_UTF_8);
639

    
640
	if (ctx->page_buffer_prev)
641
		free (ctx->page_buffer_prev);
642
	if (ctx->ucs2_buffer_prev)
643
		free (ctx->ucs2_buffer_prev);
644
	// Switch "dump" buffers
645
	ctx->page_buffer_prev_used=ctx->page_buffer_cur_used;
646
	ctx->page_buffer_prev_size=ctx->page_buffer_cur_size;
647
	ctx->page_buffer_prev=ctx->page_buffer_cur;
648
	ctx->page_buffer_cur_size=0;
649
	ctx->page_buffer_cur_used=0;
650
	ctx->page_buffer_cur=NULL;
651
	// Also switch compare buffers
652
	ctx->ucs2_buffer_prev_used=ctx->ucs2_buffer_cur_used;
653
	ctx->ucs2_buffer_prev_size=ctx->ucs2_buffer_cur_size;
654
	ctx->ucs2_buffer_prev=ctx->ucs2_buffer_cur;
655
	ctx->ucs2_buffer_cur_size=0;
656
	ctx->ucs2_buffer_cur_used=0;
657
	ctx->ucs2_buffer_cur=NULL;
658
}
659

    
660
// Note: c1 and c2 are just used for debug output, not for the actual comparison
661
int fuzzy_memcmp (const char *c1, const char *c2, const uint64_t *ucs2_buf1, unsigned ucs2_buf1_len,
662
				  const uint64_t *ucs2_buf2, unsigned ucs2_buf2_len)
663
{
664
	size_t l;
665
	size_t short_len=ucs2_buf1_len<ucs2_buf2_len?ucs2_buf1_len:ucs2_buf2_len;
666
	size_t max=(short_len * tlt_config.levdistmaxpct)/100;
667
	unsigned upto=(ucs2_buf1_len<ucs2_buf2_len)?ucs2_buf1_len:ucs2_buf2_len;
668
	if (max < tlt_config.levdistmincnt)
669
		max=tlt_config.levdistmincnt;
670

    
671
	// For the second string, only take the first chars (up to the first string length, that's upto).
672
	l = (size_t) levenshtein_dist (ucs2_buf1,ucs2_buf2,ucs2_buf1_len,upto);
673
	int res=(l>max);
674
	dbg_print(CCX_DMT_LEVENSHTEIN, "\rLEV | %s | %s | Max: %d | Calc: %d | Match: %d\n", c1,c2,max,l,!res);
675
	return res;
676
}
677

    
678
void process_page(struct TeletextCtx *ctx, teletext_page_t *page, struct cc_subtitle *sub)
679
{
680
	if ((tlt_config.extraction_start.set && page->hide_timestamp < tlt_config.extraction_start.time_in_ms) ||
681
		(tlt_config.extraction_end.set && page->show_timestamp > tlt_config.extraction_end.time_in_ms) ||
682
        	page->hide_timestamp == 0)
683
	{
684
        	return;
685
	}
686
#ifdef DEBUG
687
	for (uint8_t row = 1; row < 25; row++) {
688
		fprintf(stdout, "DEBUG[%02u]: ", row);
689
		for (uint8_t col = 0; col < 40; col++) fprintf(stdout, "%3x ", page->text[row][col]);
690
		fprintf(stdout, "\n");
691
	}
692
	fprintf(stdout, "\n");
693
#endif
694
	char u[4] = {0, 0, 0, 0};
695

    
696
	// optimization: slicing column by column -- higher probability we could find boxed area start mark sooner
697
	uint8_t page_is_empty = YES;
698
	for (uint8_t col = 0; col < 40; col++)
699
	{
700
		for (uint8_t row = 1; row < 25; row++)
701
		{
702
			if (page->text[row][col] == 0x0b)
703
			{
704
				page_is_empty = NO;
705
				goto page_is_empty;
706
			}
707
		}
708
	}
709
	page_is_empty:
710
	if (page_is_empty == YES) return;
711

    
712
	if (page->show_timestamp > page->hide_timestamp)
713
		page->hide_timestamp = page->show_timestamp;
714

    
715
	char timecode_show[24] = { 0 }, timecode_hide[24] = { 0 };
716

    
717
	int time_reported=0;
718
	char c_tempb[256]; // For buffering
719
	uint8_t line_count = 0;
720

    
721
	timestamp_to_srttime(page->show_timestamp, timecode_show);
722
	timecode_show[12] = 0;
723
	timestamp_to_srttime(page->hide_timestamp, timecode_hide);
724
	timecode_hide[12] = 0;
725

    
726
	// process data
727
	for (uint8_t row = 1; row < 25; row++)
728
	{
729
		// anchors for string trimming purpose
730
		uint8_t col_start = 40;
731
		uint8_t col_stop = 40;
732

    
733
		for (int8_t col = 39; col >= 0; col--)
734
		{
735
			if (page->text[row][col] == 0xb)
736
			{
737
				col_start = col;
738
				line_count++;
739
				break;
740
			}
741
		}
742
		// line is empty
743
		if (col_start > 39)
744
			continue;
745

    
746
		for (uint8_t col = col_start + 1; col <= 39; col++)
747
		{
748
			if (page->text[row][col] > 0x20)
749
			{
750
				if (col_stop > 39)
751
					col_start = col;
752
				col_stop = col;
753
			}
754
			if (page->text[row][col] == 0xa)
755
				break;
756
		}
757
		// line is empty
758
		if (col_stop > 39)
759
			continue;
760

    
761
		// ETS 300 706, chapter 12.2: Alpha White ("Set-After") - Start-of-row default condition.
762
		// used for colour changes _before_ start box mark
763
		// white is default as stated in ETS 300 706, chapter 12.2
764
		// black(0), red(1), green(2), yellow(3), blue(4), magenta(5), cyan(6), white(7)
765
		uint8_t foreground_color = 0x7;
766
		uint8_t font_tag_opened = NO;
767

    
768
		if (line_count > 1)
769
		{
770
			switch (tlt_config.write_format)
771
			{
772
				case CCX_OF_TRANSCRIPT:
773
					page_buffer_add_string(ctx, " ");
774
					break;
775
				case CCX_OF_SMPTETT:
776
					page_buffer_add_string(ctx, "<br/>");
777
					break;
778
				default:
779
					page_buffer_add_string(ctx, "\r\n");
780
			}
781
		}
782

    
783
		if (tlt_config.gui_mode_reports)
784
		{
785
			fprintf (stderr, "###SUBTITLE#");
786
			if (!time_reported)
787
			{
788
				char timecode_show_mmss[6], timecode_hide_mmss[6];
789
				memcpy (timecode_show_mmss, timecode_show+3, 5);
790
				memcpy (timecode_hide_mmss, timecode_hide+3, 5);
791
				timecode_show_mmss[5]=0;
792
				timecode_hide_mmss[5]=0;
793
				// Note, only MM:SS here as we need to save space in the preview window
794
				fprintf (stderr, "%s#%s#",timecode_show_mmss, timecode_hide_mmss);
795
				time_reported=1;
796
			}
797
			else
798
				fprintf (stderr, "##");
799
		}
800

    
801
		for (uint8_t col = 0; col <= col_stop; col++)
802
		{
803
			// v is just a shortcut
804
			uint16_t v = page->text[row][col];
805

    
806
			if (col < col_start)
807
			{
808
				if (v <= 0x7) foreground_color = (uint8_t) v;
809
			}
810

    
811
			if (col == col_start)
812
			{
813
				if ((foreground_color != 0x7) && !tlt_config.nofontcolor)
814
				{
815
					sprintf (c_tempb, "<font color=\"%s\">", TTXT_COLOURS[foreground_color]);
816
					page_buffer_add_string (ctx, c_tempb);
817
					font_tag_opened = YES;
818
				}
819
			}
820

    
821
			if (col >= col_start)
822
			{
823
				if (v <= 0x7)
824
				{
825
					// ETS 300 706, chapter 12.2: Unless operating in "Hold Mosaics" mode,
826
					// each character space occupied by a spacing attribute is displayed as a SPACE.
827
					if (!tlt_config.nofontcolor)
828
					{
829
						if (font_tag_opened == YES)
830
						{
831
							page_buffer_add_string (ctx, "</font>");
832
							font_tag_opened = NO;
833
						}
834

    
835
						// black is considered as white for telxcc purpose
836
						// telxcc writes <font/> tags only when needed
837
						if ((v > 0x0) && (v < 0x7))
838
						{
839
							sprintf (c_tempb, "<font color=\"%s\">", TTXT_COLOURS[v]);
840
							page_buffer_add_string (ctx, c_tempb);
841
							font_tag_opened = YES;
842
						}
843
					}
844
					else
845
						v = 0x20;
846
				}
847

    
848
				if (v >= 0x20)
849
				{
850
					ucs2_to_utf8(u, v);
851
					uint64_t ucs2_char=(u[0]<<24) | (u[1]<<16) | (u[2]<<8) | u[3];
852
					ucs2_buffer_add_char(ctx, ucs2_char);
853

    
854
					// translate some chars into entities, if in colour mode
855
					if (!tlt_config.nofontcolor && !tlt_config.nohtmlescape)
856
					{
857
						for (uint8_t i = 0; i < array_length(ENTITIES); i++)
858
							if (v == ENTITIES[i].character)
859
							{
860
								page_buffer_add_string (ctx, ENTITIES[i].entity);
861
								// v < 0x20 won't be printed in next block
862
								v = 0;
863
								break;
864
							}
865
					}
866
				}
867
				if (v >= 0x20)
868
				{
869
					page_buffer_add_string (ctx, u);
870
					if (tlt_config.gui_mode_reports) // For now we just handle the easy stuff
871
						fprintf (stderr,"%s",u);
872
				}
873
			}
874
		}
875

    
876
		// no tag will left opened!
877
		if ((!tlt_config.nofontcolor) && (font_tag_opened == YES))
878
		{
879
			page_buffer_add_string (ctx, "</font>");
880
			font_tag_opened = NO;
881
		}
882

    
883
		if (tlt_config.gui_mode_reports)
884
		{
885
			fprintf (stderr,"\n");
886
		}
887
	}
888
	time_reported=0;
889

    
890
	switch (tlt_config.write_format)
891
	{
892
		case CCX_OF_TRANSCRIPT:
893
		case CCX_OF_SRT:
894
			if (ctx->page_buffer_prev_used == 0)
895
				ctx->prev_show_timestamp = page->show_timestamp;
896
			if (ctx->page_buffer_prev_used == 0 ||
897
					fuzzy_memcmp (ctx->page_buffer_prev, ctx->page_buffer_cur,
898
						ctx->ucs2_buffer_prev, ctx->ucs2_buffer_prev_used,
899
						ctx->ucs2_buffer_cur, ctx->ucs2_buffer_cur_used
900
						) == 0)
901
			{
902
				// If empty previous buffer, we just start one with the
903
				// current page and do nothing. Wait until we see more.
904
				if (ctx->page_buffer_prev)
905
					free (ctx->page_buffer_prev);
906

    
907
				ctx->page_buffer_prev_used	= ctx->page_buffer_cur_used;
908
				ctx->page_buffer_prev_size	= ctx->page_buffer_cur_size;
909
				ctx->page_buffer_prev	= ctx->page_buffer_cur;
910
				ctx->page_buffer_cur_size	= 0;
911
				ctx->page_buffer_cur_used	= 0;
912
				ctx->page_buffer_cur		= NULL;
913

    
914
				if (ctx->ucs2_buffer_prev)
915
					free (ctx->ucs2_buffer_prev);
916
				ctx->ucs2_buffer_prev_used	= ctx->ucs2_buffer_cur_used;
917
				ctx->ucs2_buffer_prev_size	= ctx->ucs2_buffer_cur_size;
918
				ctx->ucs2_buffer_prev	= ctx->ucs2_buffer_cur;
919
				ctx->ucs2_buffer_cur_size	= 0;
920
				ctx->ucs2_buffer_cur_used	= 0;
921
				ctx->ucs2_buffer_cur		= NULL;
922
				ctx->prev_hide_timestamp	= page->hide_timestamp;
923
				break;
924
			}
925
			else
926
			{
927
				// OK, the old and new buffer don't match. So write the old
928
				telxcc_dump_prev_page(ctx, sub);
929
				ctx->prev_hide_timestamp = page->hide_timestamp;
930
				ctx->prev_show_timestamp = page->show_timestamp;
931
			}
932
			break;
933
		default:
934
			if (ctx->sentence_cap)
935
				telx_case_fix(ctx);
936
			add_cc_sub_text(sub, ctx->page_buffer_cur, page->show_timestamp,
937
				page->hide_timestamp + 1, NULL, "TLT", CCX_ENC_UTF_8);
938
	}
939

    
940
	// Also update GUI...
941

    
942
	ctx->page_buffer_cur_used=0;
943
	if (ctx->page_buffer_cur)
944
		ctx->page_buffer_cur[0]=0;
945
	if (tlt_config.gui_mode_reports)
946
		fflush (stderr);
947
}
948

    
949
void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, teletext_packet_payload_t *packet, uint64_t timestamp, struct cc_subtitle *sub)
950
{
951
	// variable names conform to ETS 300 706, chapter 7.1.2
952
	uint8_t address, m, y, designation_code;
953
	address = (unham_8_4(packet->address[1]) << 4) | unham_8_4(packet->address[0]);
954
	m = address & 0x7;
955
	if (m == 0) m = 8;
956
	y = (address >> 3) & 0x1f;
957
	designation_code = (y > 25) ? unham_8_4(packet->data[0]) : 0x00;
958

    
959
	if (y == 0)
960
	{
961

    
962
		// CC map
963
		uint8_t i = (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]);
964
		uint8_t flag_subtitle = (unham_8_4(packet->data[5]) & 0x08) >> 3;
965
		uint16_t page_number;
966
		uint8_t charset;
967
		uint8_t c;
968
		ctx->cc_map[i] |= flag_subtitle << (m - 1);
969

    
970
		if ((flag_subtitle == YES) && (i < 0xff))
971
		{
972
			int thisp= (m << 8) | (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]);
973
			char t1[10];
974
			sprintf (t1,"%x",thisp); // Example: 1928 -> 788
975
			thisp=atoi (t1);
976
			if (!ctx->seen_sub_page[thisp])
977
			{
978
				ctx->seen_sub_page[thisp]=1;
979
				mprint ("\rNotice: Teletext page with possible subtitles detected: %03d\n",thisp);
980
			}
981
		}
982
		if ((tlt_config.page == 0) && (flag_subtitle == YES) && (i < 0xff))
983
		{
984
			tlt_config.page = (m << 8) | (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]);
985
			mprint ("- No teletext page specified, first received suitable page is %03x, not guaranteed\n", tlt_config.page);
986
		}
987

    
988
		// Page number and control bits
989
		page_number = (m << 8) | (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]);
990
		charset = ((unham_8_4(packet->data[7]) & 0x08) | (unham_8_4(packet->data[7]) & 0x04) | (unham_8_4(packet->data[7]) & 0x02)) >> 1;
991
		//uint8_t flag_suppress_header = unham_8_4(packet->data[6]) & 0x01;
992
		//uint8_t flag_inhibit_display = (unham_8_4(packet->data[6]) & 0x08) >> 3;
993

    
994
		// ETS 300 706, chapter 9.3.1.3:
995
		// When set to '1' the service is designated to be in Serial mode and the transmission of a page is terminated
996
		// by the next page header with a different page number.
997
		// When set to '0' the service is designated to be in Parallel mode and the transmission of a page is terminated
998
		// by the next page header with a different page number but the same magazine number.
999
		// The same setting shall be used for all page headers in the service.
1000
		// ETS 300 706, chapter 7.2.1: Page is terminated by and excludes the next page header packet
1001
		// having the same magazine address in parallel transmission mode, or any magazine address in serial transmission mode.
1002
		ctx->transmission_mode = (transmission_mode_t) (unham_8_4(packet->data[7]) & 0x01);
1003

    
1004
		// FIXME: Well, this is not ETS 300 706 kosher, however we are interested in DATA_UNIT_EBU_TELETEXT_SUBTITLE only
1005
		if ((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (data_unit_id != DATA_UNIT_EBU_TELETEXT_SUBTITLE)) return;
1006

    
1007
		if ((ctx->receiving_data == YES) && (
1008
			((ctx->transmission_mode == TRANSMISSION_MODE_SERIAL) && (PAGE(page_number) != PAGE(tlt_config.page))) ||
1009
			((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (PAGE(page_number) != PAGE(tlt_config.page)) && (m == MAGAZINE(tlt_config.page)))))
1010
		{
1011
			ctx->receiving_data = NO;
1012
			return;
1013
		}
1014

    
1015
		// Page transmission is terminated, however now we are waiting for our new page
1016
		if (page_number != tlt_config.page)
1017
			return;
1018

    
1019

    
1020
		// Now we have the begining of page transmission; if there is page_buffer pending, process it
1021
		if (ctx->page_buffer.tainted == YES)
1022
		{
1023
			// Convert telx to UCS-2 before processing
1024
			for(uint8_t yt = 1; yt <= 23; ++yt)
1025
			{
1026
				for(uint8_t it = 0; it < 40; it++)
1027
				{
1028
					if (ctx->page_buffer.text[yt][it] != 0x00 && ctx->page_buffer.g2_char_present[yt][it] == 0)
1029
						ctx->page_buffer.text[yt][it] = telx_to_ucs2(ctx->page_buffer.text[yt][it]);
1030
				}
1031
			}
1032
			// it would be nice, if subtitle hides on previous video frame, so we contract 40 ms (1 frame @25 fps)
1033
			ctx->page_buffer.hide_timestamp = timestamp - 40;
1034
			if (ctx->page_buffer.hide_timestamp > timestamp)
1035
			{
1036
				ctx->page_buffer.hide_timestamp = 0;
1037
			}
1038
			process_page(ctx, &ctx->page_buffer, sub);
1039
		}
1040

    
1041
		ctx->page_buffer.show_timestamp = timestamp;
1042
		ctx->page_buffer.hide_timestamp = 0;
1043
		memset(ctx->page_buffer.text, 0x00, sizeof(ctx->page_buffer.text));
1044
		memset(ctx->page_buffer.g2_char_present, 0x00, sizeof(ctx->page_buffer.g2_char_present));
1045
		ctx->page_buffer.tainted = NO;
1046
		ctx->receiving_data = YES;
1047
		if(default_g0_charset == LATIN) // G0 Character National Option Sub-sets selection required only for Latin Character Sets
1048
		{
1049
			primary_charset.g0_x28 = UNDEFINED;
1050
			c = (primary_charset.g0_m29 != UNDEFINED) ? primary_charset.g0_m29 : charset;
1051
			remap_g0_charset(c);
1052
		}
1053
		/*
1054
		// I know -- not needed; in subtitles we will never need disturbing teletext page status bar
1055
		// displaying tv station name, current time etc.
1056
		if (flag_suppress_header == NO) {
1057
			for (uint8_t i = 14; i < 40; i++) page_buffer.text[y][i] = telx_to_ucs2(packet->data[i]);
1058
			//page_buffer.tainted = YES;
1059
		}
1060
		*/
1061
	}
1062
	else if ((m == MAGAZINE(tlt_config.page)) && (y >= 1) && (y <= 23) && (ctx->receiving_data == YES))
1063
	{
1064
		// ETS 300 706, chapter 9.4.1: Packets X/26 at presentation Levels 1.5, 2.5, 3.5 are used for addressing
1065
		// a character location and overwriting the existing character defined on the Level 1 page
1066
		// ETS 300 706, annex B.2.2: Packets with Y = 26 shall be transmitted before any packets with Y = 1 to Y = 25;
1067
		// so page_buffer.text[y][i] may already contain any character received
1068
		// in frame number 26, skip original G0 character
1069
		for (uint8_t i = 0; i < 40; i++)
1070
		{
1071
			if (ctx->page_buffer.text[y][i] == 0x00)
1072
				ctx->page_buffer.text[y][i] = packet->data[i];
1073
		}
1074
		ctx->page_buffer.tainted = YES;
1075
	}
1076
	else if ((m == MAGAZINE(tlt_config.page)) && (y == 26) && (ctx->receiving_data == YES))
1077
	{
1078
		// ETS 300 706, chapter 12.3.2: X/26 definition
1079
		uint8_t x26_row = 0;
1080
		uint8_t x26_col = 0;
1081

    
1082
		uint32_t triplets[13] = { 0 };
1083
		for (uint8_t i = 1, j = 0; i < 40; i += 3, j++) triplets[j] = unham_24_18((packet->data[i + 2] << 16) | (packet->data[i + 1] << 8) | packet->data[i]);
1084

    
1085
		for (uint8_t j = 0; j < 13; j++)
1086
		{
1087
			uint8_t data;
1088
			uint8_t mode;
1089
			uint8_t address;
1090
			uint8_t row_address_group;
1091
			// invalid data (HAM24/18 uncorrectable error detected), skip group
1092
			if (triplets[j] == 0xffffffff)
1093
			{
1094
				dbg_print (CCX_DMT_TELETEXT, "- Unrecoverable data error; UNHAM24/18()=%04x\n", triplets[j]);
1095
				continue;
1096
			}
1097

    
1098
			data = (triplets[j] & 0x3f800) >> 11;
1099
			mode = (triplets[j] & 0x7c0) >> 6;
1100
			address = triplets[j] & 0x3f;
1101
			row_address_group = (address >= 40) && (address <= 63);
1102

    
1103
			// ETS 300 706, chapter 12.3.1, table 27: set active position
1104
			if ((mode == 0x04) && (row_address_group == YES))
1105
			{
1106
				x26_row = address - 40;
1107
				if (x26_row == 0) x26_row = 24;
1108
				x26_col = 0;
1109
			}
1110

    
1111
			// ETS 300 706, chapter 12.3.1, table 27: termination marker
1112
			if ((mode >= 0x11) && (mode <= 0x1f) && (row_address_group == YES)) break;
1113

    
1114
			// ETS 300 706, chapter 12.3.1, table 27: character from G2 set
1115
			if ((mode == 0x0f) && (row_address_group == NO))
1116
			{
1117
				x26_col = address;
1118
				if (data > 31)
1119
				{
1120
					ctx->page_buffer.text[x26_row][x26_col] = G2[0][data - 0x20];
1121
					ctx->page_buffer.g2_char_present[x26_row][x26_col] = 1;
1122
				}
1123
			}
1124

    
1125
			// ETS 300 706 v1.2.1, chapter 12.3.4, Table 29: G0 character without diacritical mark (display '@' instead of '*')
1126
			if ((mode == 0x10) && (row_address_group == NO))
1127
			{
1128
				x26_col = address;
1129
				if (data == 64) // check for @ symbol
1130
				{
1131
					remap_g0_charset(0);
1132
					ctx->page_buffer.text[x26_row][x26_col] = 0x40;
1133
				}
1134

    
1135
			}
1136

    
1137
			// ETS 300 706, chapter 12.3.1, table 27: G0 character with diacritical mark
1138
			if ((mode >= 0x11) && (mode <= 0x1f) && (row_address_group == NO))
1139
			{
1140
				x26_col = address;
1141

    
1142
				// A - Z
1143
				if ((data >= 65) && (data <= 90))
1144
					ctx->page_buffer.text[x26_row][x26_col] = G2_ACCENTS[mode - 0x11][data - 65];
1145
				// a - z
1146
				else if ((data >= 97) && (data <= 122))
1147
					ctx->page_buffer.text[x26_row][x26_col] = G2_ACCENTS[mode - 0x11][data - 71];
1148
				// other
1149
				else
1150
					ctx->page_buffer.text[x26_row][x26_col] = telx_to_ucs2(data);
1151

    
1152
				ctx->page_buffer.g2_char_present[x26_row][x26_col] = 1;
1153
			}
1154
		}
1155
	}
1156
	else if ((m == MAGAZINE(tlt_config.page)) && (y == 28) && (ctx->receiving_data == YES))
1157
	{
1158
		// TODO:
1159
		//   ETS 300 706, chapter 9.4.7: Packet X/28/4
1160
		//   Where packets 28/0 and 28/4 are both transmitted as part of a page, packet 28/0 takes precedence over 28/4 for all but the colour map entry coding.
1161
		if ((designation_code == 0) || (designation_code == 4))
1162
		{
1163
			// ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1
1164
			// ETS 300 706, chapter 9.4.7: Packet X/28/4
1165
			uint32_t triplet0 = unham_24_18((packet->data[3] << 16) | (packet->data[2] << 8) | packet->data[1]);
1166

    
1167
			if (triplet0 == 0xffffffff)
1168
			{
1169
				// invalid data (HAM24/18 uncorrectable error detected), skip group
1170
				dbg_print (CCX_DMT_TELETEXT, "! Unrecoverable data error; UNHAM24/18()=%04x\n", triplet0);
1171
			}
1172
			else
1173
			{
1174
				// ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1 only
1175
				if ((triplet0 & 0x0f) == 0x00)
1176
				{
1177
					// ETS 300 706, Table 32
1178
					set_g0_charset(triplet0); // Deciding G0 Character Set
1179
					if(default_g0_charset == LATIN)
1180
					{
1181
						primary_charset.g0_x28 = (triplet0 & 0x3f80) >> 7;
1182
						remap_g0_charset(primary_charset.g0_x28);
1183
					}
1184
				}
1185
			}
1186
		}
1187
	}
1188
	else if ((m == MAGAZINE(tlt_config.page)) && (y == 29))
1189
	{
1190
		// TODO:
1191
		//   ETS 300 706, chapter 9.5.1 Packet M/29/0
1192
		//   Where M/29/0 and M/29/4 are transmitted for the same magazine, M/29/0 takes precedence over M/29/4.
1193
		if ((designation_code == 0) || (designation_code == 4))
1194
		{
1195
			// ETS 300 706, chapter 9.5.1: Packet M/29/0
1196
			// ETS 300 706, chapter 9.5.3: Packet M/29/4
1197
			uint32_t triplet0 = unham_24_18((packet->data[3] << 16) | (packet->data[2] << 8) | packet->data[1]);
1198

    
1199
			if (triplet0 == 0xffffffff)
1200
			{
1201
				// invalid data (HAM24/18 uncorrectable error detected), skip group
1202
				dbg_print (CCX_DMT_TELETEXT, "! Unrecoverable data error; UNHAM24/18()=%04x\n", triplet0);
1203
			}
1204
			else
1205
			{
1206
				// ETS 300 706, table 11: Coding of Packet M/29/0
1207
				// ETS 300 706, table 13: Coding of Packet M/29/4
1208
				if ((triplet0 & 0xff) == 0x00)
1209
				{
1210
					set_g0_charset(triplet0);
1211
					if(default_g0_charset == LATIN)
1212
					{
1213
						primary_charset.g0_m29 = (triplet0 & 0x3f80) >> 7;
1214
						// X/28 takes precedence over M/29
1215
						if (primary_charset.g0_x28 == UNDEFINED)
1216
						{
1217
							remap_g0_charset(primary_charset.g0_m29);
1218
						}
1219
					}
1220
				}
1221
			}
1222
		}
1223
	}
1224
	else if ((m == 8) && (y == 30))
1225
	{
1226
		// ETS 300 706, chapter 9.8: Broadcast Service Data Packets
1227
		if (ctx->states.programme_info_processed == NO)
1228
		{
1229
			// ETS 300 706, chapter 9.8.1: Packet 8/30 Format 1
1230
			if (unham_8_4(packet->data[0]) < 2)
1231
			{
1232
				uint32_t t = 0;
1233
				time_t t0;
1234
				mprint ("- Programme Identification Data = ");
1235
				for (uint8_t i = 20; i < 40; i++)
1236
				{
1237
					char u[4] = { 0, 0, 0, 0 };
1238
					uint8_t c = telx_to_ucs2(packet->data[i]);
1239
					// strip any control codes from PID, eg. TVP station
1240
					if (c < 0x20) continue;
1241

    
1242
					ucs2_to_utf8(u, c);
1243
					mprint ( "%s", u);
1244
				}
1245
				mprint ("\n");
1246

    
1247
				// OMG! ETS 300 706 stores timestamp in 7 bytes in Modified Julian Day in BCD format + HH:MM:SS in BCD format
1248
				// + timezone as 5-bit count of half-hours from GMT with 1-bit sign
1249
				// In addition all decimals are incremented by 1 before transmission.
1250
				// 1st step: BCD to Modified Julian Day
1251
				t += (packet->data[10] & 0x0f) * 10000;
1252
				t += ((packet->data[11] & 0xf0) >> 4) * 1000;
1253
				t += (packet->data[11] & 0x0f) * 100;
1254
				t += ((packet->data[12] & 0xf0) >> 4) * 10;
1255
				t += (packet->data[12] & 0x0f);
1256
				t -= 11111;
1257
				// 2nd step: conversion Modified Julian Day to unix timestamp
1258
				t = (t - 40587) * 86400;
1259
				// 3rd step: add time
1260
				t += 3600 * ( ((packet->data[13] & 0xf0) >> 4) * 10 + (packet->data[13] & 0x0f) );
1261
				t +=   60 * ( ((packet->data[14] & 0xf0) >> 4) * 10 + (packet->data[14] & 0x0f) );
1262
				t +=        ( ((packet->data[15] & 0xf0) >> 4) * 10 + (packet->data[15] & 0x0f) );
1263
				t -= 40271;
1264
				// 4th step: conversion to time_t
1265
				t0 = (time_t)t;
1266
				// ctime output itself is \n-ended
1267
				mprint ("- Universal Time Co-ordinated = %s", ctime(&t0));
1268

    
1269
				dbg_print (CCX_DMT_TELETEXT, "- Transmission mode = %s\n", (ctx->transmission_mode == TRANSMISSION_MODE_SERIAL ? "serial" : "parallel"));
1270

    
1271
				if (tlt_config.write_format == CCX_OF_TRANSCRIPT && tlt_config.date_format==ODF_DATE && !tlt_config.noautotimeref)
1272
				{
1273
					mprint ("- Broadcast Service Data Packet received, resetting UTC referential value to %s", ctime(&t0));
1274
					utc_refvalue = t;
1275
					ctx->states.pts_initialized = NO;
1276
				}
1277

    
1278
				ctx->states.programme_info_processed = YES;
1279
			}
1280
		}
1281
	}
1282
}
1283

    
1284
void tlt_write_rcwt(struct lib_cc_decode *ctx, uint8_t data_unit_id, uint8_t *packet, uint64_t timestamp,  struct cc_subtitle *sub)
1285
{
1286
	ctx->writedata((unsigned char *) &data_unit_id, sizeof(uint8_t), NULL, sub);
1287
	ctx->writedata((unsigned char *) &timestamp, sizeof(uint64_t), NULL, sub);
1288
	ctx->writedata((unsigned char *) packet, 44, NULL, sub);
1289
}
1290

    
1291
void tlt_read_rcwt(void *codec, unsigned char *buf, struct cc_subtitle *sub)
1292
{
1293
	struct TeletextCtx *ctx = codec;
1294

    
1295
	data_unit_t id = buf[0];
1296
	uint64_t t;
1297
	memcpy(&t, &buf[1], sizeof(uint64_t));
1298
	teletext_packet_payload_t *pl = (teletext_packet_payload_t *)&buf[9];
1299

    
1300
	ctx->last_timestamp = t;
1301

    
1302
	ctx->tlt_packet_counter++;
1303
	process_telx_packet(ctx, id, pl, t, sub);
1304
}
1305

    
1306
int tlt_print_seen_pages(struct lib_cc_decode *dec_ctx)
1307
{
1308
	struct TeletextCtx *ctx = NULL;
1309

    
1310
	if(dec_ctx->codec != CCX_CODEC_TELETEXT)
1311
	{
1312
		errno = EINVAL;
1313
		return -1;
1314
	}
1315

    
1316
	ctx = dec_ctx->private_data;
1317

    
1318
	for (int i = 0; i < MAX_TLT_PAGES; i++)
1319
	{
1320
		if (ctx->seen_sub_page[i] == 0)
1321
			continue;
1322
		printf("%d ", i);
1323
	}
1324
	return CCX_OK;
1325
}
1326
void set_tlt_delta(struct lib_cc_decode *dec_ctx, uint64_t pts)
1327
{
1328
	struct TeletextCtx *ctx = dec_ctx->private_data;
1329
	uint32_t t = (uint32_t)(pts / 90);
1330
	if (ctx->states.pts_initialized == NO)
1331
	{
1332
		if (utc_refvalue == UINT64_MAX)
1333
			ctx->delta = 0 - (uint64_t)t;
1334
		else
1335
			ctx->delta = (uint64_t)(1000 * utc_refvalue - t);
1336
		ctx->t0 = t;
1337

    
1338
		ctx->states.pts_initialized = YES;
1339
		if ((ctx->using_pts == NO) && (ctx->global_timestamp == 0))
1340
		{
1341
			// We are using global PCR, nevertheless we still have not received valid PCR timestamp yet
1342
			ctx->states.pts_initialized = NO;
1343
		}
1344
	}
1345
}
1346
int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint16_t size, struct cc_subtitle *sub, int sentence_cap)
1347
{
1348
	uint64_t pes_prefix;
1349
	uint8_t pes_stream_id;
1350
	uint16_t pes_packet_length;
1351
	uint8_t optional_pes_header_included = NO;
1352
	uint16_t optional_pes_header_length = 0;
1353
	//extension
1354
	uint8_t pes_scrambling_control;
1355
	uint8_t pes_priority;
1356
	uint8_t data_alignment_indicator;
1357
	uint8_t copyright;
1358
	uint8_t original_or_copy;
1359
	uint8_t pts_dts_flag;
1360
	uint8_t escr_flag;
1361
	uint8_t es_rate;
1362
	uint8_t dsm_flag;
1363
	uint8_t aci_flag;
1364
	uint8_t pes_crc_flag;
1365
	uint8_t pes_ext_flag;
1366
	//extension
1367
	uint32_t t = 0;
1368
	uint16_t i;
1369
	struct TeletextCtx *ctx = dec_ctx->private_data;
1370
	ctx->sentence_cap = sentence_cap;
1371

    
1372
	if(!ctx)
1373
	{
1374
		mprint("Teletext: Context cant be NULL, use telxcc_init\n");
1375
		return CCX_EINVAL;
1376
	}
1377

    
1378
	ctx->tlt_packet_counter++;
1379
	if (size < 6)
1380
		return CCX_OK;
1381

    
1382
	// Packetized Elementary Stream (PES) 32-bit start code
1383
	pes_prefix = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2];
1384
	pes_stream_id = buffer[3];
1385

    
1386
	// check for PES header
1387
	if (pes_prefix != 0x000001)
1388
		return 0;
1389

    
1390
	// stream_id is not "Private Stream 1" (0xbd)
1391
	if (pes_stream_id != 0xbd)
1392
		return 0;
1393

    
1394
	// PES packet length
1395
	// ETSI EN 301 775 V1.2.1 (2003-05) chapter 4.3: (N x 184) - 6 + 6 B header
1396
	pes_packet_length = 6 + ((buffer[4] << 8) | buffer[5]);
1397
	// Can be zero. If the "PES packet length" is set to zero, the PES packet can be of any length.
1398
	// A value of zero for the PES packet length can be used only when the PES packet payload is a video elementary stream.
1399

    
1400
	if (ccx_options.pes_header_to_stdout)
1401
	{
1402
		pes_scrambling_control = (uint8_t)(buffer[6] << 2) >> 6;
1403
		pes_priority = (uint8_t)(buffer[6] << 4) >> 7;
1404
		data_alignment_indicator = (uint8_t)(buffer[6] << 5) >> 7;
1405
		copyright = (uint8_t)(buffer[6] << 6) >> 7;
1406
		original_or_copy = (uint8_t)(buffer[6] << 7) >> 7;
1407
		pts_dts_flag = buffer[7] >> 6;
1408
		escr_flag = (uint8_t)(buffer[7] << 2) >> 7;
1409
		es_rate = (uint8_t)(buffer[7] << 3) >> 7;
1410
		dsm_flag = (uint8_t)(buffer[7] << 4) >> 7;
1411
		aci_flag = (uint8_t)(buffer[7] << 5) >> 7;
1412
		pes_crc_flag = (uint8_t)(buffer[7] << 6) >> 7;
1413
		pes_ext_flag = (uint8_t)(buffer[7] << 7) >> 7;
1414

    
1415
		printf("Packet start code prefix: %04x # ", pes_prefix);
1416
		printf("Stream ID: %04x # ", pes_stream_id);
1417
		printf("Packet length: %d ", pes_packet_length);
1418
		printf("PESSC: 0x%x ", pes_scrambling_control);
1419
		printf("PESP: 0x%x ", pes_priority);
1420
		printf("DAI: 0x%x ", data_alignment_indicator);
1421
		printf("CY: 0x%x\n", copyright);
1422
		printf("OOC: 0x%x ", original_or_copy);
1423
		printf("PTSDTS: 0x%x ", pts_dts_flag);
1424
		printf("ESCR: 0x%x ", escr_flag);
1425
		printf("Rate: 0x%x\n", es_rate);
1426
		printf("DSM: 0x%x ", dsm_flag);
1427
		printf("ACI: 0x%x ", aci_flag);
1428
		printf("PESCRC: 0x%x ", pes_crc_flag);
1429
		printf("EXT: 0x%x\n", pes_ext_flag);
1430
	}
1431

    
1432
	if (pes_packet_length == 6)
1433
	{
1434
		if (ccx_options.pes_header_to_stdout)
1435
			printf("\n");
1436
		else
1437
			return CCX_OK;
1438
	}
1439

    
1440
	// truncate incomplete PES packets
1441
	if (pes_packet_length > size)
1442
		pes_packet_length = size;
1443

    
1444
	// optional PES header marker bits (10.. ....)
1445
	if ((buffer[6] & 0xc0) == 0x80)
1446
	{
1447
		optional_pes_header_included = YES;
1448
		optional_pes_header_length = buffer[8];
1449
	}
1450

    
1451
	// should we use PTS or PCR?
1452
	if (ctx->using_pts == UNDEFINED)
1453
	{
1454
		if ((optional_pes_header_included == YES) && ((buffer[7] & 0x80) > 0))
1455
		{
1456
			ctx->using_pts = YES;
1457
			dbg_print (CCX_DMT_TELETEXT, "- PID 0xbd PTS available\n");
1458
		}
1459
		else
1460
		{
1461
			ctx->using_pts = NO;
1462
			dbg_print (CCX_DMT_TELETEXT, "- PID 0xbd PTS unavailable, using TS PCR\n");
1463
		}
1464
	}
1465

    
1466
	// If there is no PTS available, use global PCR
1467
	if (ctx->using_pts == NO)
1468
	{
1469
		t = ctx->global_timestamp;
1470
	}
1471
	// if (using_pts == NO) t = get_pts();
1472
	else
1473
	{
1474
		// PTS is 33 bits wide, however, timestamp in ms fits into 32 bits nicely (PTS/90)
1475
		// presentation and decoder timestamps use the 90 KHz clock, hence PTS/90 = [ms]
1476
		uint64_t pts = 0;
1477
		// __MUST__ assign value to uint64_t and __THEN__ rotate left by 29 bits
1478
		// << is defined for signed int (as in "C" spec.) and overflow occurs
1479
		pts = (buffer[9] & 0x0e);
1480
		pts <<= 29;
1481
		pts |= (buffer[10] << 22);
1482
		pts |= ((buffer[11] & 0xfe) << 14);
1483
		pts |= (buffer[12] << 7);
1484
		pts |= ((buffer[13] & 0xfe) >> 1);
1485
		t = (uint32_t) (pts / 90);
1486

    
1487
		if (ccx_options.pes_header_to_stdout)
1488
		{
1489
			//printf("# Associated PTS: %d \n", pts);
1490
			printf("# Associated PTS: %" PRId64 " # ", pts);
1491
			printf("Diff: %" PRId64 "\n", pts - last_pes_pts);
1492
			//printf("Diff: %d # ", pts - last_pes_pts);
1493
			last_pes_pts = pts;
1494
		}
1495
	}
1496

    
1497
	/*if (ctx->states.pts_initialized == NO)
1498
	{
1499
		if (utc_refvalue == UINT64_MAX)
1500
			ctx->delta = 0 - (uint64_t)t;
1501
		else
1502
			ctx->delta = (uint64_t) (1000 * utc_refvalue - t);
1503
		ctx->t0 = t;
1504

    
1505
		ctx->states.pts_initialized = YES;
1506
		if ((ctx->using_pts == NO) && (ctx->global_timestamp == 0))
1507
		{
1508
			// We are using global PCR, nevertheless we still have not received valid PCR timestamp yet
1509
			ctx->states.pts_initialized = NO;
1510
		}
1511
	}*/
1512
	if (t < ctx->t0)
1513
		ctx->delta = ctx->last_timestamp;
1514
	ctx->last_timestamp = t + ctx->delta;
1515
	if (ctx->delta < 0 && ctx->last_timestamp > t)
1516
	{
1517
		ctx->last_timestamp = 0;
1518
	}
1519
	ctx->t0 = t;
1520

    
1521
	// skip optional PES header and process each 46-byte teletext packet
1522
	i = 7;
1523
	if (optional_pes_header_included == YES)
1524
		i += 3 + optional_pes_header_length;
1525

    
1526
	while (i <= pes_packet_length - 6)
1527
	{
1528
		uint8_t data_unit_id = buffer[i++];
1529
		uint8_t data_unit_len = buffer[i++];
1530

    
1531
		if ((data_unit_id == DATA_UNIT_EBU_TELETEXT_NONSUBTITLE) || (data_unit_id == DATA_UNIT_EBU_TELETEXT_SUBTITLE))
1532
		{
1533
			// teletext payload has always size 44 bytes
1534
			if (data_unit_len == 44)
1535
			{
1536
				// reverse endianess (via lookup table), ETS 300 706, chapter 7.1
1537
				for (uint8_t j = 0; j < data_unit_len; j++) buffer[i + j] = REVERSE_8[buffer[i + j]];
1538

    
1539
				if (tlt_config.write_format == CCX_OF_RCWT)
1540
					tlt_write_rcwt(dec_ctx, data_unit_id, &buffer[i], ctx->last_timestamp, sub);
1541
				else
1542
				{
1543
					// FIXME: This explicit type conversion could be a problem some day -- do not need to be platform independent
1544
					process_telx_packet(ctx, (data_unit_t) data_unit_id, (teletext_packet_payload_t *)&buffer[i], ctx->last_timestamp, sub);
1545
				}
1546
			}
1547
		}
1548

    
1549
		i += data_unit_len;
1550
	}
1551
	return CCX_OK;
1552
}
1553

    
1554
// Called only when teletext is detected or forced and it's going to be used for extraction.
1555
void* telxcc_init(void)
1556
{
1557
	struct TeletextCtx *ctx = malloc(sizeof(struct TeletextCtx));
1558

    
1559
	if(!ctx)
1560
		return NULL;
1561
	memset (ctx->seen_sub_page, 0, MAX_TLT_PAGES * sizeof(short int));
1562
	memset (ctx->cc_map, 0, 256);
1563

    
1564
	ctx->page_buffer_prev = NULL;
1565
	ctx->page_buffer_cur = NULL;
1566
	ctx->page_buffer_cur_size = 0;
1567
	ctx->page_buffer_cur_used = 0;
1568
	ctx->page_buffer_prev_size = 0;
1569
	ctx->page_buffer_prev_used = 0;
1570
	// Current and previous page compare strings. This is plain text (no colors,
1571
	// tags, etc) in UCS2 (fixed length), so we can compare easily.
1572
	ctx->ucs2_buffer_prev = NULL;
1573
	ctx->ucs2_buffer_cur = NULL;
1574
	ctx->ucs2_buffer_cur_size = 0;
1575
	ctx->ucs2_buffer_cur_used = 0;
1576
	ctx->ucs2_buffer_prev_size = 0;
1577
	ctx->ucs2_buffer_prev_used = 0;
1578

    
1579
	// Buffer timestamp
1580
	ctx->last_timestamp = 0;
1581
	memset(&ctx->page_buffer, 0, sizeof(teletext_page_t));
1582
	ctx->states.programme_info_processed = NO;
1583
	ctx->states.pts_initialized = NO;
1584
	ctx->tlt_packet_counter = 0;
1585
	ctx->transmission_mode = TRANSMISSION_MODE_SERIAL;
1586
	ctx->receiving_data = NO;
1587

    
1588
	ctx->using_pts = UNDEFINED;
1589
	ctx->delta = 0;
1590
	ctx->t0 = 0;
1591

    
1592
	ctx->sentence_cap = 0;
1593
	ctx->new_sentence = 0;
1594
	ctx->splitbysentence = 0;
1595

    
1596
	return ctx;
1597
}
1598

    
1599
void telxcc_update_gt(void *codec, uint32_t global_timestamp)
1600
{
1601
	struct TeletextCtx *ctx = codec;
1602
	ctx->global_timestamp = global_timestamp;
1603
}
1604

    
1605
// Close output
1606
void telxcc_close(void **ctx, struct cc_subtitle *sub)
1607
{
1608
	struct TeletextCtx *ttext = *ctx;
1609

    
1610
	if(!ttext)
1611
		return;
1612

    
1613
	mprint ( "\nTeletext decoder: %"PRIu32" packets processed \n", ttext->tlt_packet_counter);
1614
	if (tlt_config.write_format != CCX_OF_RCWT && sub)
1615
	{
1616
		// output any pending close caption
1617
		if (ttext->page_buffer.tainted == YES)
1618
		{
1619
			// Convert telx to UCS-2 before processing
1620
			for(uint8_t yt = 1; yt <= 23; ++yt)
1621
			{
1622
				for(uint8_t it = 0; it < 40; it++)
1623
				{
1624
					if (ttext->page_buffer.text[yt][it] != 0x00 && ttext->page_buffer.g2_char_present[yt][it] == 0)
1625
						ttext->page_buffer.text[yt][it] = telx_to_ucs2(ttext->page_buffer.text[yt][it]);
1626
				}
1627
			}
1628
			// this time we do not subtract any frames, there will be no more frames
1629
			ttext->page_buffer.hide_timestamp = ttext->last_timestamp;
1630
			process_page(ttext, &ttext->page_buffer, sub);
1631
		}
1632

    
1633
		telxcc_dump_prev_page(ttext, sub);
1634

    
1635
	}
1636
	freep(&ttext->ucs2_buffer_cur);
1637
	freep(&ttext->page_buffer_cur);
1638
	freep(ctx);
1639
}
    (1-1/1)