Libav 0.7.1
|
00001 /* 00002 * audio encoder psychoacoustic model 00003 * Copyright (C) 2008 Konstantin Shishkov 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #ifndef AVCODEC_PSYMODEL_H 00023 #define AVCODEC_PSYMODEL_H 00024 00025 #include "avcodec.h" 00026 00028 #define PSY_MAX_BANDS 128 00029 00030 #define PSY_MAX_CHANS 20 00031 00035 typedef struct FFPsyBand { 00036 int bits; 00037 float energy; 00038 float threshold; 00039 float distortion; 00040 float perceptual_weight; 00041 } FFPsyBand; 00042 00046 typedef struct FFPsyWindowInfo { 00047 int window_type[3]; 00048 int window_shape; 00049 int num_windows; 00050 int grouping[8]; 00051 int *window_sizes; 00052 } FFPsyWindowInfo; 00053 00057 typedef struct FFPsyContext { 00058 AVCodecContext *avctx; 00059 const struct FFPsyModel *model; 00060 00061 FFPsyBand *psy_bands; 00062 00063 uint8_t **bands; 00064 int *num_bands; 00065 int num_lens; 00066 00067 float pe[PSY_MAX_CHANS]; 00068 00069 struct { 00070 int size; 00071 int bits; 00072 } bitres; 00073 00074 void* model_priv_data; 00075 } FFPsyContext; 00076 00080 typedef struct FFPsyModel { 00081 const char *name; 00082 int (*init) (FFPsyContext *apc); 00083 00095 FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); 00096 00105 void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi); 00106 00107 void (*end) (FFPsyContext *apc); 00108 } FFPsyModel; 00109 00121 av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, 00122 int num_lens, 00123 const uint8_t **bands, const int* num_bands); 00124 00130 av_cold void ff_psy_end(FFPsyContext *ctx); 00131 00132 00133 /************************************************************************** 00134 * Audio preprocessing stuff. * 00135 * This should be moved into some audio filter eventually. * 00136 **************************************************************************/ 00137 struct FFPsyPreprocessContext; 00138 00142 av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx); 00143 00153 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, 00154 const int16_t *audio, int16_t *dest, 00155 int tag, int channels); 00156 00160 av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); 00161 00162 #endif /* AVCODEC_PSYMODEL_H */