Libav 0.7.1
|
00001 /* 00002 * ID3v2 header parser 00003 * Copyright (c) 2003 Fabrice Bellard 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "id3v2.h" 00023 #include "id3v1.h" 00024 #include "libavutil/avstring.h" 00025 #include "libavutil/intreadwrite.h" 00026 #include "libavutil/dict.h" 00027 #include "avio_internal.h" 00028 00029 int ff_id3v2_match(const uint8_t *buf, const char * magic) 00030 { 00031 return buf[0] == magic[0] && 00032 buf[1] == magic[1] && 00033 buf[2] == magic[2] && 00034 buf[3] != 0xff && 00035 buf[4] != 0xff && 00036 (buf[6] & 0x80) == 0 && 00037 (buf[7] & 0x80) == 0 && 00038 (buf[8] & 0x80) == 0 && 00039 (buf[9] & 0x80) == 0; 00040 } 00041 00042 int ff_id3v2_tag_len(const uint8_t * buf) 00043 { 00044 int len = ((buf[6] & 0x7f) << 21) + 00045 ((buf[7] & 0x7f) << 14) + 00046 ((buf[8] & 0x7f) << 7) + 00047 (buf[9] & 0x7f) + 00048 ID3v2_HEADER_SIZE; 00049 if (buf[5] & 0x10) 00050 len += ID3v2_HEADER_SIZE; 00051 return len; 00052 } 00053 00054 static unsigned int get_size(AVIOContext *s, int len) 00055 { 00056 int v = 0; 00057 while (len--) 00058 v = (v << 7) + (avio_r8(s) & 0x7F); 00059 return v; 00060 } 00061 00062 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) 00063 { 00064 char *q, dst[512]; 00065 const char *val = NULL; 00066 int len, dstlen = sizeof(dst) - 1; 00067 unsigned genre; 00068 unsigned int (*get)(AVIOContext*) = avio_rb16; 00069 00070 dst[0] = 0; 00071 if (taglen < 1) 00072 return; 00073 00074 taglen--; /* account for encoding type byte */ 00075 00076 switch (avio_r8(pb)) { /* encoding type */ 00077 00078 case ID3v2_ENCODING_ISO8859: 00079 q = dst; 00080 while (taglen-- && q - dst < dstlen - 7) { 00081 uint8_t tmp; 00082 PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;) 00083 } 00084 *q = 0; 00085 break; 00086 00087 case ID3v2_ENCODING_UTF16BOM: 00088 taglen -= 2; 00089 switch (avio_rb16(pb)) { 00090 case 0xfffe: 00091 get = avio_rl16; 00092 case 0xfeff: 00093 break; 00094 default: 00095 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key); 00096 return; 00097 } 00098 // fall-through 00099 00100 case ID3v2_ENCODING_UTF16BE: 00101 q = dst; 00102 while (taglen > 1 && q - dst < dstlen - 7) { 00103 uint32_t ch; 00104 uint8_t tmp; 00105 00106 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;) 00107 PUT_UTF8(ch, tmp, *q++ = tmp;) 00108 } 00109 *q = 0; 00110 break; 00111 00112 case ID3v2_ENCODING_UTF8: 00113 len = FFMIN(taglen, dstlen); 00114 avio_read(pb, dst, len); 00115 dst[len] = 0; 00116 break; 00117 default: 00118 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key); 00119 } 00120 00121 if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) 00122 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) 00123 && genre <= ID3v1_GENRE_MAX) 00124 val = ff_id3v1_genre_str[genre]; 00125 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { 00126 /* dst now contains two 0-terminated strings */ 00127 dst[dstlen] = 0; 00128 len = strlen(dst); 00129 key = dst; 00130 val = dst + FFMIN(len + 1, dstlen); 00131 } 00132 else if (*dst) 00133 val = dst; 00134 00135 if (val) 00136 av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE); 00137 } 00138 00139 static int is_number(const char *str) 00140 { 00141 while (*str >= '0' && *str <= '9') str++; 00142 return !*str; 00143 } 00144 00145 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag) 00146 { 00147 AVDictionaryEntry *t; 00148 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) && 00149 strlen(t->value) == 4 && is_number(t->value)) 00150 return t; 00151 return NULL; 00152 } 00153 00154 static void merge_date(AVDictionary **m) 00155 { 00156 AVDictionaryEntry *t; 00157 char date[17] = {0}; // YYYY-MM-DD hh:mm 00158 00159 if (!(t = get_date_tag(*m, "TYER")) && 00160 !(t = get_date_tag(*m, "TYE"))) 00161 return; 00162 av_strlcpy(date, t->value, 5); 00163 av_dict_set(m, "TYER", NULL, 0); 00164 av_dict_set(m, "TYE", NULL, 0); 00165 00166 if (!(t = get_date_tag(*m, "TDAT")) && 00167 !(t = get_date_tag(*m, "TDA"))) 00168 goto finish; 00169 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value); 00170 av_dict_set(m, "TDAT", NULL, 0); 00171 av_dict_set(m, "TDA", NULL, 0); 00172 00173 if (!(t = get_date_tag(*m, "TIME")) && 00174 !(t = get_date_tag(*m, "TIM"))) 00175 goto finish; 00176 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2); 00177 av_dict_set(m, "TIME", NULL, 0); 00178 av_dict_set(m, "TIM", NULL, 0); 00179 00180 finish: 00181 if (date[0]) 00182 av_dict_set(m, "date", date, 0); 00183 } 00184 00185 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags) 00186 { 00187 int isv34, tlen, unsync; 00188 char tag[5]; 00189 int64_t next, end = avio_tell(s->pb) + len; 00190 int taghdrlen; 00191 const char *reason = NULL; 00192 AVIOContext pb; 00193 unsigned char *buffer = NULL; 00194 int buffer_size = 0; 00195 00196 switch (version) { 00197 case 2: 00198 if (flags & 0x40) { 00199 reason = "compression"; 00200 goto error; 00201 } 00202 isv34 = 0; 00203 taghdrlen = 6; 00204 break; 00205 00206 case 3: 00207 case 4: 00208 isv34 = 1; 00209 taghdrlen = 10; 00210 break; 00211 00212 default: 00213 reason = "version"; 00214 goto error; 00215 } 00216 00217 unsync = flags & 0x80; 00218 00219 if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */ 00220 int extlen = get_size(s->pb, 4); 00221 if (version == 4) 00222 extlen -= 4; // in v2.4 the length includes the length field we just read 00223 00224 if (extlen < 0) { 00225 reason = "invalid extended header length"; 00226 goto error; 00227 } 00228 avio_skip(s->pb, extlen); 00229 } 00230 00231 while (len >= taghdrlen) { 00232 unsigned int tflags = 0; 00233 int tunsync = 0; 00234 00235 if (isv34) { 00236 avio_read(s->pb, tag, 4); 00237 tag[4] = 0; 00238 if(version==3){ 00239 tlen = avio_rb32(s->pb); 00240 }else 00241 tlen = get_size(s->pb, 4); 00242 tflags = avio_rb16(s->pb); 00243 tunsync = tflags & ID3v2_FLAG_UNSYNCH; 00244 } else { 00245 avio_read(s->pb, tag, 3); 00246 tag[3] = 0; 00247 tlen = avio_rb24(s->pb); 00248 } 00249 if (tlen <= 0 || tlen > len - taghdrlen) { 00250 av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag); 00251 break; 00252 } 00253 len -= taghdrlen + tlen; 00254 next = avio_tell(s->pb) + tlen; 00255 00256 if (tflags & ID3v2_FLAG_DATALEN) { 00257 avio_rb32(s->pb); 00258 tlen -= 4; 00259 } 00260 00261 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) { 00262 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); 00263 avio_skip(s->pb, tlen); 00264 } else if (tag[0] == 'T') { 00265 if (unsync || tunsync) { 00266 int i, j; 00267 av_fast_malloc(&buffer, &buffer_size, tlen); 00268 if (!buffer) { 00269 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen); 00270 goto seek; 00271 } 00272 for (i = 0, j = 0; i < tlen; i++, j++) { 00273 buffer[j] = avio_r8(s->pb); 00274 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) { 00275 /* Unsynchronised byte, skip it */ 00276 j--; 00277 } 00278 } 00279 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL); 00280 read_ttag(s, &pb, j, tag); 00281 } else { 00282 read_ttag(s, s->pb, tlen, tag); 00283 } 00284 } 00285 else if (!tag[0]) { 00286 if (tag[1]) 00287 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding"); 00288 avio_skip(s->pb, tlen); 00289 break; 00290 } 00291 /* Skip to end of tag */ 00292 seek: 00293 avio_seek(s->pb, next, SEEK_SET); 00294 } 00295 00296 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */ 00297 end += 10; 00298 00299 error: 00300 if (reason) 00301 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason); 00302 avio_seek(s->pb, end, SEEK_SET); 00303 av_free(buffer); 00304 return; 00305 } 00306 00307 void ff_id3v2_read(AVFormatContext *s, const char *magic) 00308 { 00309 int len, ret; 00310 uint8_t buf[ID3v2_HEADER_SIZE]; 00311 int found_header; 00312 int64_t off; 00313 00314 do { 00315 /* save the current offset in case there's nothing to read/skip */ 00316 off = avio_tell(s->pb); 00317 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE); 00318 if (ret != ID3v2_HEADER_SIZE) 00319 break; 00320 found_header = ff_id3v2_match(buf, magic); 00321 if (found_header) { 00322 /* parse ID3v2 header */ 00323 len = ((buf[6] & 0x7f) << 21) | 00324 ((buf[7] & 0x7f) << 14) | 00325 ((buf[8] & 0x7f) << 7) | 00326 (buf[9] & 0x7f); 00327 ff_id3v2_parse(s, len, buf[3], buf[5]); 00328 } else { 00329 avio_seek(s->pb, off, SEEK_SET); 00330 } 00331 } while (found_header); 00332 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv); 00333 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv); 00334 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv); 00335 merge_date(&s->metadata); 00336 } 00337 00338 const AVMetadataConv ff_id3v2_34_metadata_conv[] = { 00339 { "TALB", "album"}, 00340 { "TCOM", "composer"}, 00341 { "TCON", "genre"}, 00342 { "TCOP", "copyright"}, 00343 { "TENC", "encoded_by"}, 00344 { "TIT2", "title"}, 00345 { "TLAN", "language"}, 00346 { "TPE1", "artist"}, 00347 { "TPE2", "album_artist"}, 00348 { "TPE3", "performer"}, 00349 { "TPOS", "disc"}, 00350 { "TPUB", "publisher"}, 00351 { "TRCK", "track"}, 00352 { "TSSE", "encoder"}, 00353 { 0 } 00354 }; 00355 00356 const AVMetadataConv ff_id3v2_4_metadata_conv[] = { 00357 { "TDRL", "date"}, 00358 { "TDRC", "date"}, 00359 { "TDEN", "creation_time"}, 00360 { "TSOA", "album-sort"}, 00361 { "TSOP", "artist-sort"}, 00362 { "TSOT", "title-sort"}, 00363 { 0 } 00364 }; 00365 00366 const AVMetadataConv ff_id3v2_2_metadata_conv[] = { 00367 { "TAL", "album"}, 00368 { "TCO", "genre"}, 00369 { "TT2", "title"}, 00370 { "TEN", "encoded_by"}, 00371 { "TP1", "artist"}, 00372 { "TP2", "album_artist"}, 00373 { "TP3", "performer"}, 00374 { "TRK", "track"}, 00375 { 0 } 00376 }; 00377 00378 00379 const char ff_id3v2_tags[][4] = { 00380 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT", 00381 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED", 00382 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", 00383 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE", 00384 { 0 }, 00385 }; 00386 00387 const char ff_id3v2_4_tags[][4] = { 00388 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO", 00389 "TPRO", "TSOA", "TSOP", "TSOT", "TSST", 00390 { 0 }, 00391 }; 00392 00393 const char ff_id3v2_3_tags[][4] = { 00394 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER", 00395 { 0 }, 00396 };