aacenc_lib.h 86 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709
  1. /* -----------------------------------------------------------------------------
  2. Software License for The Fraunhofer FDK AAC Codec Library for Android
  3. © Copyright 1995 - 2021 Fraunhofer-Gesellschaft zur Förderung der angewandten
  4. Forschung e.V. All rights reserved.
  5. 1. INTRODUCTION
  6. The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
  7. that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
  8. scheme for digital audio. This FDK AAC Codec software is intended to be used on
  9. a wide variety of Android devices.
  10. AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
  11. general perceptual audio codecs. AAC-ELD is considered the best-performing
  12. full-bandwidth communications codec by independent studies and is widely
  13. deployed. AAC has been standardized by ISO and IEC as part of the MPEG
  14. specifications.
  15. Patent licenses for necessary patent claims for the FDK AAC Codec (including
  16. those of Fraunhofer) may be obtained through Via Licensing
  17. (www.vialicensing.com) or through the respective patent owners individually for
  18. the purpose of encoding or decoding bit streams in products that are compliant
  19. with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
  20. Android devices already license these patent claims through Via Licensing or
  21. directly from the patent owners, and therefore FDK AAC Codec software may
  22. already be covered under those patent licenses when it is used for those
  23. licensed purposes only.
  24. Commercially-licensed AAC software libraries, including floating-point versions
  25. with enhanced sound quality, are also available from Fraunhofer. Users are
  26. encouraged to check the Fraunhofer website for additional applications
  27. information and documentation.
  28. 2. COPYRIGHT LICENSE
  29. Redistribution and use in source and binary forms, with or without modification,
  30. are permitted without payment of copyright license fees provided that you
  31. satisfy the following conditions:
  32. You must retain the complete text of this software license in redistributions of
  33. the FDK AAC Codec or your modifications thereto in source code form.
  34. You must retain the complete text of this software license in the documentation
  35. and/or other materials provided with redistributions of the FDK AAC Codec or
  36. your modifications thereto in binary form. You must make available free of
  37. charge copies of the complete source code of the FDK AAC Codec and your
  38. modifications thereto to recipients of copies in binary form.
  39. The name of Fraunhofer may not be used to endorse or promote products derived
  40. from this library without prior written permission.
  41. You may not charge copyright license fees for anyone to use, copy or distribute
  42. the FDK AAC Codec software or your modifications thereto.
  43. Your modified versions of the FDK AAC Codec must carry prominent notices stating
  44. that you changed the software and the date of any change. For modified versions
  45. of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
  46. must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
  47. AAC Codec Library for Android."
  48. 3. NO PATENT LICENSE
  49. NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
  50. limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
  51. Fraunhofer provides no warranty of patent non-infringement with respect to this
  52. software.
  53. You may use this FDK AAC Codec software or modifications thereto only for
  54. purposes that are authorized by appropriate patent licenses.
  55. 4. DISCLAIMER
  56. This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
  57. holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
  58. including but not limited to the implied warranties of merchantability and
  59. fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  60. CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
  61. or consequential damages, including but not limited to procurement of substitute
  62. goods or services; loss of use, data, or profits, or business interruption,
  63. however caused and on any theory of liability, whether in contract, strict
  64. liability, or tort (including negligence), arising in any way out of the use of
  65. this software, even if advised of the possibility of such damage.
  66. 5. CONTACT INFORMATION
  67. Fraunhofer Institute for Integrated Circuits IIS
  68. Attention: Audio and Multimedia Departments - FDK AAC LL
  69. Am Wolfsmantel 33
  70. 91058 Erlangen, Germany
  71. www.iis.fraunhofer.de/amm
  72. amm-info@iis.fraunhofer.de
  73. ----------------------------------------------------------------------------- */
  74. /**************************** AAC encoder library ******************************
  75. Author(s): M. Lohwasser
  76. Description:
  77. *******************************************************************************/
  78. /**
  79. * \file aacenc_lib.h
  80. * \brief FDK AAC Encoder library interface header file.
  81. *
  82. \mainpage Introduction
  83. \section Scope
  84. This document describes the high-level interface and usage of the ISO/MPEG-2/4
  85. AAC Encoder library developed by the Fraunhofer Institute for Integrated
  86. Circuits (IIS).
  87. The library implements encoding on the basis of the MPEG-2 and MPEG-4 AAC
  88. Low-Complexity standard, and depending on the library's configuration, MPEG-4
  89. High-Efficiency AAC v2 and/or AAC-ELD standard.
  90. All references to SBR (Spectral Band Replication) are only applicable to HE-AAC
  91. or AAC-ELD versions of the library. All references to PS (Parametric Stereo) are
  92. only applicable to HE-AAC v2 versions of the library.
  93. \section encBasics Encoder Basics
  94. This document can only give a rough overview about the ISO/MPEG-2 and ISO/MPEG-4
  95. AAC audio coding standard. To understand all the terms in this document, you are
  96. encouraged to read the following documents.
  97. - ISO/IEC 13818-7 (MPEG-2 AAC), which defines the syntax of MPEG-2 AAC audio
  98. bitstreams.
  99. - ISO/IEC 14496-3 (MPEG-4 AAC, subparts 1 and 4), which defines the syntax of
  100. MPEG-4 AAC audio bitstreams.
  101. - Lutzky, Schuller, Gayer, Krämer, Wabnik, "A guideline to audio codec
  102. delay", 116th AES Convention, May 8, 2004
  103. MPEG Advanced Audio Coding is based on a time-to-frequency mapping of the
  104. signal. The signal is partitioned into overlapping portions and transformed into
  105. frequency domain. The spectral components are then quantized and coded. \n An
  106. MPEG-2 or MPEG-4 AAC audio bitstream is composed of frames. Contrary to MPEG-1/2
  107. Layer-3 (mp3), the length of individual frames is not restricted to a fixed
  108. number of bytes, but can take on any length between 1 and 768 bytes.
  109. \page LIBUSE Library Usage
  110. \section InterfaceDescription API Files
  111. All API header files are located in the folder /include of the release package.
  112. All header files are provided for usage in C/C++ programs. The AAC encoder
  113. library API functions are located in aacenc_lib.h.
  114. \section CallingSequence Calling Sequence
  115. For encoding of ISO/MPEG-2/4 AAC bitstreams the following sequence is mandatory.
  116. Input read and output write functions as well as the corresponding open and
  117. close functions are left out, since they may be implemented differently
  118. according to the user's specific requirements. The example implementation uses
  119. file-based input/output.
  120. -# Call aacEncOpen() to allocate encoder instance with required \ref encOpen
  121. "configuration". \code HANDLE_AACENCODER hAacEncoder = NULL; if ( (ErrorStatus =
  122. aacEncOpen(&hAacEncoder,0,0)) != AACENC_OK ) { \endcode
  123. -# Call aacEncoder_SetParam() for each parameter to be set. AOT, samplingrate,
  124. channelMode, bitrate and transport type are \ref encParams "mandatory". \code
  125. ErrorStatus = aacEncoder_SetParam(hAacEncoder, parameter, value);
  126. \endcode
  127. -# Call aacEncEncode() with NULL parameters to \ref encReconf "initialize"
  128. encoder instance with present parameter set. \code ErrorStatus =
  129. aacEncEncode(hAacEncoder, NULL, NULL, NULL, NULL); \endcode
  130. -# Call aacEncInfo() to retrieve a configuration data block to be transmitted
  131. out of band. This is required when using RFC3640 or RFC3016 like transport.
  132. \code
  133. AACENC_InfoStruct encInfo;
  134. aacEncInfo(hAacEncoder, &encInfo);
  135. \endcode
  136. -# Encode input audio data in loop.
  137. \code
  138. do
  139. {
  140. \endcode
  141. Feed \ref feedInBuf "input buffer" with new audio data and provide input/output
  142. \ref bufDes "arguments" to aacEncEncode(). \code ErrorStatus =
  143. aacEncEncode(hAacEncoder, &inBufDesc, &outBufDesc, &inargs, &outargs); \endcode
  144. Write \ref writeOutData "output data" to file or audio device.
  145. \code
  146. } while (ErrorStatus==AACENC_OK);
  147. \endcode
  148. -# Call aacEncClose() and destroy encoder instance.
  149. \code
  150. aacEncClose(&hAacEncoder);
  151. \endcode
  152. \section encOpen Encoder Instance Allocation
  153. The assignment of the aacEncOpen() function is very flexible and can be used in
  154. the following way.
  155. - If the amount of memory consumption is not an issue, the encoder instance can
  156. be allocated for the maximum number of possible audio channels (for example 6 or
  157. 8) with the full functional range supported by the library. This is the default
  158. open procedure for the AAC encoder if memory consumption does not need to be
  159. minimized. \code aacEncOpen(&hAacEncoder,0,0) \endcode
  160. - If the required MPEG-4 AOTs do not call for the full functional range of the
  161. library, encoder modules can be allocated selectively. \verbatim
  162. ------------------------------------------------------
  163. AAC | SBR | PS | MD | FLAGS | value
  164. -----+-----+-----+----+-----------------------+-------
  165. X | - | - | - | (0x01) | 0x01
  166. X | X | - | - | (0x01|0x02) | 0x03
  167. X | X | X | - | (0x01|0x02|0x04) | 0x07
  168. X | - | - | X | (0x01 |0x10) | 0x11
  169. X | X | - | X | (0x01|0x02 |0x10) | 0x13
  170. X | X | X | X | (0x01|0x02|0x04|0x10) | 0x17
  171. ------------------------------------------------------
  172. - AAC: Allocate AAC Core Encoder module.
  173. - SBR: Allocate Spectral Band Replication module.
  174. - PS: Allocate Parametric Stereo module.
  175. - MD: Allocate Meta Data module within AAC encoder.
  176. \endverbatim
  177. \code aacEncOpen(&hAacEncoder,value,0) \endcode
  178. - Specifying the maximum number of channels to be supported in the encoder
  179. instance can be done as follows.
  180. - For example allocate an encoder instance which supports 2 channels for all
  181. supported AOTs. The library itself may be capable of encoding up to 6 or 8
  182. channels but in this example only 2 channel encoding is required and thus only
  183. buffers for 2 channels are allocated to save data memory. \code
  184. aacEncOpen(&hAacEncoder,0,2) \endcode
  185. - Additionally the maximum number of supported channels in the SBR module can
  186. be denoted separately.\n In this example the encoder instance provides a maximum
  187. of 6 channels out of which up to 2 channels support SBR. This encoder instance
  188. can produce for example 5.1 channel AAC-LC streams or stereo HE-AAC (v2)
  189. streams. HE-AAC 5.1 multi channel is not possible since only 2 out of 6 channels
  190. support SBR, which saves data memory. \code aacEncOpen(&hAacEncoder,0,6|(2<<8))
  191. \endcode \n
  192. \section bufDes Input/Output Arguments
  193. \subsection allocIOBufs Provide Buffer Descriptors
  194. In the present encoder API, the input and output buffers are described with \ref
  195. AACENC_BufDesc "buffer descriptors". This mechanism allows a flexible handling
  196. of input and output buffers without impact to the actual encoding call. Optional
  197. buffers are necessary e.g. for ancillary data, meta data input or additional
  198. output buffers describing superframing data in DAB+ or DRM+.\n At least one
  199. input buffer for audio input data and one output buffer for bitstream data must
  200. be allocated. The input buffer size can be a user defined multiple of the number
  201. of input channels. PCM input data will be copied from the user defined PCM
  202. buffer to an internal input buffer and so input data can be less than one AAC
  203. audio frame. The output buffer size should be 6144 bits per channel excluding
  204. the LFE channel. If the output data does not fit into the provided buffer, an
  205. AACENC_ERROR will be returned by aacEncEncode(). \code static INT_PCM
  206. inputBuffer[8*2048]; static UCHAR ancillaryBuffer[50]; static
  207. AACENC_MetaData metaDataSetup; static UCHAR outputBuffer[8192];
  208. \endcode
  209. All input and output buffer must be clustered in input and output buffer arrays.
  210. \code
  211. static void* inBuffer[] = { inputBuffer, ancillaryBuffer, &metaDataSetup
  212. }; static INT inBufferIds[] = { IN_AUDIO_DATA, IN_ANCILLRY_DATA,
  213. IN_METADATA_SETUP }; static INT inBufferSize[] = { sizeof(inputBuffer),
  214. sizeof(ancillaryBuffer), sizeof(metaDataSetup) }; static INT inBufferElSize[]
  215. = { sizeof(INT_PCM), sizeof(UCHAR), sizeof(AACENC_MetaData) };
  216. static void* outBuffer[] = { outputBuffer };
  217. static INT outBufferIds[] = { OUT_BITSTREAM_DATA };
  218. static INT outBufferSize[] = { sizeof(outputBuffer) };
  219. static INT outBufferElSize[] = { sizeof(UCHAR) };
  220. \endcode
  221. Allocate buffer descriptors
  222. \code
  223. AACENC_BufDesc inBufDesc;
  224. AACENC_BufDesc outBufDesc;
  225. \endcode
  226. Initialize input buffer descriptor
  227. \code
  228. inBufDesc.numBufs = sizeof(inBuffer)/sizeof(void*);
  229. inBufDesc.bufs = (void**)&inBuffer;
  230. inBufDesc.bufferIdentifiers = inBufferIds;
  231. inBufDesc.bufSizes = inBufferSize;
  232. inBufDesc.bufElSizes = inBufferElSize;
  233. \endcode
  234. Initialize output buffer descriptor
  235. \code
  236. outBufDesc.numBufs = sizeof(outBuffer)/sizeof(void*);
  237. outBufDesc.bufs = (void**)&outBuffer;
  238. outBufDesc.bufferIdentifiers = outBufferIds;
  239. outBufDesc.bufSizes = outBufferSize;
  240. outBufDesc.bufElSizes = outBufferElSize;
  241. \endcode
  242. \subsection argLists Provide Input/Output Argument Lists
  243. The input and output arguments of an aacEncEncode() call are described in
  244. argument structures. \code AACENC_InArgs inargs; AACENC_OutArgs outargs;
  245. \endcode
  246. \section feedInBuf Feed Input Buffer
  247. The input buffer should be handled as a modulo buffer. New audio data in the
  248. form of pulse-code- modulated samples (PCM) must be read from external and be
  249. fed to the input buffer depending on its fill level. The required sample bitrate
  250. (represented by the data type INT_PCM which is 16, 24 or 32 bits wide) is fixed
  251. and depends on library configuration (usually 16 bit). \code inargs.numInSamples
  252. += WAV_InputRead ( wavIn, &inputBuffer[inargs.numInSamples],
  253. FDKmin(encInfo.inputChannels*encInfo.frameLength,
  254. sizeof(inputBuffer) /
  255. sizeof(INT_PCM)-inargs.numInSamples),
  256. SAMPLE_BITS
  257. );
  258. \endcode
  259. After the encoder's internal buffer is fed with incoming audio samples, and
  260. aacEncEncode() processed the new input data, update/move remaining samples in
  261. input buffer, simulating a modulo buffer: \code if (outargs.numInSamples>0) {
  262. FDKmemmove( inputBuffer,
  263. &inputBuffer[outargs.numInSamples],
  264. sizeof(INT_PCM)*(inargs.numInSamples-outargs.numInSamples) );
  265. inargs.numInSamples -= outargs.numInSamples;
  266. }
  267. \endcode
  268. \section writeOutData Output Bitstream Data
  269. If any AAC bitstream data is available, write it to output file or device as
  270. follows. \code if (outargs.numOutBytes>0) { FDKfwrite(outputBuffer,
  271. outargs.numOutBytes, 1, pOutFile);
  272. }
  273. \endcode
  274. \section cfgMetaData Meta Data Configuration
  275. If the present library is configured with Metadata support, it is possible to
  276. insert meta data side info into the generated audio bitstream while encoding.
  277. To work with meta data the encoder instance has to be \ref encOpen "allocated"
  278. with meta data support. The meta data mode must be configured with the
  279. ::AACENC_METADATA_MODE parameter and aacEncoder_SetParam() function. \code
  280. aacEncoder_SetParam(hAacEncoder, AACENC_METADATA_MODE, 0-3); \endcode
  281. This configuration indicates how to embed meta data into bitstrem. Either no
  282. insertion, MPEG or ETSI style. The meta data itself must be specified within the
  283. meta data setup structure AACENC_MetaData.
  284. Changing one of the AACENC_MetaData setup parameters can be achieved from
  285. outside the library within ::IN_METADATA_SETUP input buffer. There is no need to
  286. supply meta data setup structure every frame. If there is no new meta setup data
  287. available, the encoder uses the previous setup or the default configuration in
  288. initial state.
  289. In general the audio compressor and limiter within the encoder library can be
  290. configured with the ::AACENC_METADATA_DRC_PROFILE parameter
  291. AACENC_MetaData::drc_profile and and AACENC_MetaData::comp_profile.
  292. \n
  293. \section encReconf Encoder Reconfiguration
  294. The encoder library allows reconfiguration of the encoder instance with new
  295. settings continuously between encoding frames. Each parameter to be changed must
  296. be set with a single aacEncoder_SetParam() call. The internal status of each
  297. parameter can be retrieved with an aacEncoder_GetParam() call.\n There is no
  298. stand-alone reconfiguration function available. When parameters were modified
  299. from outside the library, an internal control mechanism triggers the necessary
  300. reconfiguration process which will be applied at the beginning of the following
  301. aacEncEncode() call. This state can be observed from external via the
  302. AACENC_INIT_STATUS and aacEncoder_GetParam() function. The reconfiguration
  303. process can also be applied immediately when all parameters of an aacEncEncode()
  304. call are NULL with a valid encoder handle.\n\n The internal reconfiguration
  305. process can be controlled from extern with the following access. \code
  306. aacEncoder_SetParam(hAacEncoder, AACENC_CONTROL_STATE, AACENC_CTRLFLAGS);
  307. \endcode
  308. \section encParams Encoder Parametrization
  309. All parameteres listed in ::AACENC_PARAM can be modified within an encoder
  310. instance.
  311. \subsection encMandatory Mandatory Encoder Parameters
  312. The following parameters must be specified when the encoder instance is
  313. initialized. \code aacEncoder_SetParam(hAacEncoder, AACENC_AOT, value);
  314. aacEncoder_SetParam(hAacEncoder, AACENC_BITRATE, value);
  315. aacEncoder_SetParam(hAacEncoder, AACENC_SAMPLERATE, value);
  316. aacEncoder_SetParam(hAacEncoder, AACENC_CHANNELMODE, value);
  317. \endcode
  318. Beyond that is an internal auto mode which preinitizializes the ::AACENC_BITRATE
  319. parameter if the parameter was not set from extern. The bitrate depends on the
  320. number of effective channels and sampling rate and is determined as follows.
  321. \code
  322. AAC-LC (AOT_AAC_LC): 1.5 bits per sample
  323. HE-AAC (AOT_SBR): 0.625 bits per sample (dualrate sbr)
  324. HE-AAC (AOT_SBR): 1.125 bits per sample (downsampled sbr)
  325. HE-AAC v2 (AOT_PS): 0.5 bits per sample
  326. \endcode
  327. \subsection channelMode Channel Mode Configuration
  328. The input audio data is described with the ::AACENC_CHANNELMODE parameter in the
  329. aacEncoder_SetParam() call. It is not possible to use the encoder instance with
  330. a 'number of input channels' argument. Instead, the channelMode must be set as
  331. follows. \code aacEncoder_SetParam(hAacEncoder, AACENC_CHANNELMODE, value);
  332. \endcode The parameter is specified in ::CHANNEL_MODE and can be mapped from the
  333. number of input channels in the following way. \code CHANNEL_MODE chMode =
  334. MODE_INVALID;
  335. switch (nChannels) {
  336. case 1: chMode = MODE_1; break;
  337. case 2: chMode = MODE_2; break;
  338. case 3: chMode = MODE_1_2; break;
  339. case 4: chMode = MODE_1_2_1; break;
  340. case 5: chMode = MODE_1_2_2; break;
  341. case 6: chMode = MODE_1_2_2_1; break;
  342. case 7: chMode = MODE_6_1; break;
  343. case 8: chMode = MODE_7_1_BACK; break;
  344. default:
  345. chMode = MODE_INVALID;
  346. }
  347. return chMode;
  348. \endcode
  349. \subsection peakbitrate Peak Bitrate Configuration
  350. In AAC, the default bitreservoir configuration depends on the chosen bitrate per
  351. frame and the number of effective channels. The size can be determined as below.
  352. \f[
  353. bitreservoir = nEffChannels*6144 - (bitrate*framelength/samplerate)
  354. \f]
  355. Due to audio quality concerns it is not recommended to change the bitreservoir
  356. size to a lower value than the default setting! However, for minimizing the
  357. delay for streaming applications or for achieving a constant size of the
  358. bitstream packages in each frame, it may be necessaray to limit the maximum bits
  359. per frame size. This can be done with the ::AACENC_PEAK_BITRATE parameter. \code
  360. aacEncoder_SetParam(hAacEncoder, AACENC_PEAK_BITRATE, value);
  361. \endcode
  362. To achieve acceptable audio quality with a reduced bitreservoir size setting at
  363. least 1000 bits per audio channel is recommended. For a multichannel audio file
  364. with 5.1 channels the bitreservoir reduced to 5000 bits results in acceptable
  365. audio quality.
  366. \subsection vbrmode Variable Bitrate Mode
  367. The variable bitrate (VBR) mode coding adapts the bit consumption to the
  368. psychoacoustic requirements of the signal. The encoder ignores the user-defined
  369. bit rate and selects a suitable pre-defined configuration based on the provided
  370. AOT. The VBR mode 1 is tuned for HE-AACv2, for VBR mode 2, HE-AACv1 should be
  371. used. VBR modes 3-5 should be used with Low-Complexity AAC. When encoding
  372. AAC-ELD, the best mode is selected automatically.
  373. The bitrates given in the table are averages over time and different encoder
  374. settings. They strongly depend on the type of audio signal. The VBR
  375. configurations can be adjusted with the ::AACENC_BITRATEMODE encoder parameter.
  376. \verbatim
  377. -----------------------------------------------
  378. VBR_MODE | Approx. Bitrate in kbps for stereo
  379. | AAC-LC | AAC-ELD
  380. ----------+---------------+--------------------
  381. VBR_1 | 32 (HE-AACv2) | 48
  382. VBR_2 | 72 (HE-AACv1) | 56
  383. VBR_3 | 112 | 72
  384. VBR_4 | 148 | 148
  385. VBR_5 | 228 | 224
  386. --------------------------------------------
  387. \endverbatim
  388. Note that these figures are valid for stereo encoding only. VBR modes 2-5 will
  389. yield much lower bit rates when encoding single-channel input. For
  390. configurations which are making use of downmix modules the AAC core channels
  391. respectively downmix channels shall be considered.
  392. \subsection encQual Audio Quality Considerations
  393. The default encoder configuration is suggested to be used. Encoder tools such as
  394. TNS and PNS are activated by default and are internally controlled (see \ref
  395. BEHAVIOUR_TOOLS).
  396. There is an additional quality parameter called ::AACENC_AFTERBURNER. In the
  397. default configuration this quality switch is deactivated because it would cause
  398. a workload increase which might be significant. If workload is not an issue in
  399. the application we recommended to activate this feature. \code
  400. aacEncoder_SetParam(hAacEncoder, AACENC_AFTERBURNER, 0/1); \endcode
  401. \subsection encELD ELD Auto Configuration Mode
  402. For ELD configuration a so called auto configurator is available which
  403. configures SBR and the SBR ratio by itself. The configurator is used when the
  404. encoder parameter ::AACENC_SBR_MODE and ::AACENC_SBR_RATIO are not set
  405. explicitly.
  406. Based on sampling rate and chosen bitrate a reasonable SBR configuration will be
  407. used. \verbatim
  408. ------------------------------------------------------------------
  409. Sampling Rate | Total Bitrate | No. of | SBR | SBR Ratio
  410. [kHz] | [bit/s] | Chan | |
  411. | | | |
  412. ---------------+-----------------+--------+-----+-----------------
  413. ]min, 16[ | min - max | 1 | off | ---
  414. ---------------+-----------------+--------------+-----------------
  415. [16] | min - 27999 | 1 | on | downsampled SBR
  416. | 28000 - max | 1 | off | ---
  417. ---------------+-----------------+--------------+-----------------
  418. ]16 - 24] | min - 39999 | 1 | on | downsampled SBR
  419. | 40000 - max | 1 | off | ---
  420. ---------------+-----------------+--------------+-----------------
  421. ]24 - 32] | min - 27999 | 1 | on | dualrate SBR
  422. | 28000 - 55999 | 1 | on | downsampled SBR
  423. | 56000 - max | 1 | off | ---
  424. ---------------+-----------------+--------------+-----------------
  425. ]32 - 44.1] | min - 63999 | 1 | on | dualrate SBR
  426. | 64000 - max | 1 | off | ---
  427. ---------------+-----------------+--------------+-----------------
  428. ]44.1 - 48] | min - 63999 | 1 | on | dualrate SBR
  429. | 64000 - max | 1 | off | ---
  430. | | | |
  431. ---------------+-----------------+--------+-----+-----------------
  432. ]min, 16[ | min - max | 2 | off | ---
  433. ---------------+-----------------+--------------+-----------------
  434. [16] | min - 31999 | 2 | on | downsampled SBR
  435. | 32000 - 63999 | 2 | on | downsampled SBR
  436. | 64000 - max | 2 | off | ---
  437. ---------------+-----------------+--------------+-----------------
  438. ]16 - 24] | min - 47999 | 2 | on | downsampled SBR
  439. | 48000 - 79999 | 2 | on | downsampled SBR
  440. | 80000 - max | 2 | off | ---
  441. ---------------+-----------------+--------------+-----------------
  442. ]24 - 32] | min - 31999 | 2 | on | dualrate SBR
  443. | 32000 - 67999 | 2 | on | dualrate SBR
  444. | 68000 - 95999 | 2 | on | downsampled SBR
  445. | 96000 - max | 2 | off | ---
  446. ---------------+-----------------+--------------+-----------------
  447. ]32 - 44.1] | min - 43999 | 2 | on | dualrate SBR
  448. | 44000 - 127999 | 2 | on | dualrate SBR
  449. | 128000 - max | 2 | off | ---
  450. ---------------+-----------------+--------------+-----------------
  451. ]44.1 - 48] | min - 43999 | 2 | on | dualrate SBR
  452. | 44000 - 127999 | 2 | on | dualrate SBR
  453. | 128000 - max | 2 | off | ---
  454. | | |
  455. ------------------------------------------------------------------
  456. \endverbatim
  457. \subsection encDsELD Reduced Delay (Downscaled) Mode
  458. The downscaled mode of AAC-ELD reduces the algorithmic delay of AAC-ELD by
  459. virtually increasing the sampling rate. When using the downscaled mode, the
  460. bitrate should be increased for keeping the same audio quality level. For common
  461. signals, the bitrate should be increased by 25% for a downscale factor of 2.
  462. Currently, downscaling factors 2 and 4 are supported.
  463. To enable the downscaled mode in the encoder, the framelength parameter
  464. AACENC_GRANULE_LENGTH must be set accordingly to 256 or 240 for a downscale
  465. factor of 2 or 128 or 120 for a downscale factor of 4. The default values of 512
  466. or 480 mean that no downscaling is applied. \code
  467. aacEncoder_SetParam(hAacEncoder, AACENC_GRANULE_LENGTH, 256);
  468. aacEncoder_SetParam(hAacEncoder, AACENC_GRANULE_LENGTH, 128);
  469. \endcode
  470. Downscaled bitstreams are fully backwards compatible. However, the legacy
  471. decoder needs to support high sample rate, e.g. 96kHz. The signaled sampling
  472. rate is multiplied by the downscale factor. Although not required, downscaling
  473. should be applied when decoding downscaled bitstreams. It reduces CPU workload
  474. and the output will have the same sampling rate as the input. In an ideal
  475. configuration both encoder and decoder should run with the same downscale
  476. factor.
  477. The following table shows approximate filter bank delays in ms for common
  478. sampling rates(sr) at framesize(fs), and downscale factor(dsf), based on this
  479. formula: \f[ 1000 * fs / (dsf * sr) \f]
  480. \verbatim
  481. --------------------------------------
  482. | 512/2 | 512/4 | 480/2 | 480/4
  483. ------+-------+-------+-------+-------
  484. 22050 | 17.41 | 8.71 | 16.33 | 8.16
  485. 32000 | 12.00 | 6.00 | 11.25 | 5.62
  486. 44100 | 8.71 | 4.35 | 8.16 | 4.08
  487. 48000 | 8.00 | 4.00 | 7.50 | 3.75
  488. --------------------------------------
  489. \endverbatim
  490. \section audiochCfg Audio Channel Configuration
  491. The MPEG standard refers often to the so-called Channel Configuration. This
  492. Channel Configuration is used for a fixed Channel Mapping. The configurations
  493. 1-7 and 11,12,14 are predefined in MPEG standard and used for implicit
  494. signalling within the encoded bitstream. For user defined Configurations the
  495. Channel Configuration is set to 0 and the Channel Mapping must be explecitly
  496. described with an appropriate Program Config Element. The present Encoder
  497. implementation does not allow the user to configure this Channel Configuration
  498. from extern. The Encoder implementation supports fixed Channel Modes which are
  499. mapped to Channel Configuration as follow. \verbatim
  500. ----------------------------------------------------------------------------------------
  501. ChannelMode | ChCfg | Height | front_El | side_El | back_El |
  502. lfe_El
  503. -----------------------+-------+--------+---------------+----------+----------+---------
  504. MODE_1 | 1 | NORM | SCE | | |
  505. MODE_2 | 2 | NORM | CPE | | |
  506. MODE_1_2 | 3 | NORM | SCE, CPE | | |
  507. MODE_1_2_1 | 4 | NORM | SCE, CPE | | SCE |
  508. MODE_1_2_2 | 5 | NORM | SCE, CPE | | CPE |
  509. MODE_1_2_2_1 | 6 | NORM | SCE, CPE | | CPE |
  510. LFE MODE_1_2_2_2_1 | 7 | NORM | SCE, CPE, CPE | | CPE
  511. | LFE MODE_6_1 | 11 | NORM | SCE, CPE | | CPE,
  512. SCE | LFE MODE_7_1_BACK | 12 | NORM | SCE, CPE | |
  513. CPE, CPE | LFE
  514. -----------------------+-------+--------+---------------+----------+----------+---------
  515. MODE_7_1_TOP_FRONT | 14 | NORM | SCE, CPE | | CPE |
  516. LFE | | TOP | CPE | | |
  517. -----------------------+-------+--------+---------------+----------+----------+---------
  518. MODE_7_1_REAR_SURROUND | 0 | NORM | SCE, CPE | | CPE, CPE |
  519. LFE MODE_7_1_FRONT_CENTER | 0 | NORM | SCE, CPE, CPE | | CPE
  520. | LFE
  521. ----------------------------------------------------------------------------------------
  522. - NORM: Normal Height Layer. - TOP: Top Height Layer. - BTM: Bottom Height
  523. Layer.
  524. - SCE: Single Channel Element. - CPE: Channel Pair. - LFE: Low Frequency
  525. Element. \endverbatim
  526. The Table describes all fixed Channel Elements for each Channel Mode which are
  527. assigned to a speaker arrangement. The arrangement includes front, side, back
  528. and lfe Audio Channel Elements in the normal height layer, possibly followed by
  529. front, side, and back elements in the top and bottom layer (Channel
  530. Configuration 14). \n This mapping of Audio Channel Elements is defined in MPEG
  531. standard for Channel Config 1-7 and 11,12,14.\n In case of Channel Config 0 or
  532. writing matrix mixdown coefficients, the encoder enables the writing of Program
  533. Config Element itself as described in \ref encPCE. The configuration used in
  534. Program Config Element refers to the denoted Table.\n Beside the Channel Element
  535. assignment the Channel Modes are resposible for audio input data channel
  536. mapping. The Channel Mapping of the audio data depends on the selected
  537. ::AACENC_CHANNELORDER which can be MPEG or WAV like order.\n Following table
  538. describes the complete channel mapping for both Channel Order configurations.
  539. \verbatim
  540. ---------------------------------------------------------------------------------------
  541. ChannelMode | MPEG-Channelorder | WAV-Channelorder
  542. -----------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---
  543. MODE_1 | 0 | | | | | | | | 0 | | | | | |
  544. | MODE_2 | 0 | 1 | | | | | | | 0 | 1 | | | |
  545. | | MODE_1_2 | 0 | 1 | 2 | | | | | | 2 | 0 | 1 | |
  546. | | | MODE_1_2_1 | 0 | 1 | 2 | 3 | | | | | 2 | 0 | 1 | 3
  547. | | | | MODE_1_2_2 | 0 | 1 | 2 | 3 | 4 | | | | 2 | 0 | 1
  548. | 3 | 4 | | | MODE_1_2_2_1 | 0 | 1 | 2 | 3 | 4 | 5 | | | 2 | 0
  549. | 1 | 4 | 5 | 3 | | MODE_1_2_2_2_1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 2
  550. | 6 | 7 | 0 | 1 | 4 | 5 | 3 MODE_6_1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
  551. | 2 | 0 | 1 | 4 | 5 | 6 | 3 | MODE_7_1_BACK | 0 | 1 | 2 | 3 | 4 | 5 | 6
  552. | 7 | 2 | 0 | 1 | 6 | 7 | 4 | 5 | 3 MODE_7_1_TOP_FRONT | 0 | 1 | 2 | 3 | 4 |
  553. 5 | 6 | 7 | 2 | 0 | 1 | 4 | 5 | 3 | 6 | 7
  554. -----------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---
  555. MODE_7_1_REAR_SURROUND | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 2 | 0 | 1 | 6 | 7 | 4 |
  556. 5 | 3 MODE_7_1_FRONT_CENTER | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 2 | 6 | 7 | 0 | 1
  557. | 4 | 5 | 3
  558. ---------------------------------------------------------------------------------------
  559. \endverbatim
  560. The denoted mapping is important for correct audio channel assignment when using
  561. MPEG or WAV ordering. The incoming audio channels are distributed MPEG like
  562. starting at the front channels and ending at the back channels. The distribution
  563. is used as described in Table concering Channel Config and fix channel elements.
  564. Please see the following example for clarification.
  565. \verbatim
  566. Example: MODE_1_2_2_1 - WAV-Channelorder 5.1
  567. ------------------------------------------
  568. Input Channel | Coder Channel
  569. --------------------+---------------------
  570. 2 (front center) | 0 (SCE channel)
  571. 0 (left center) | 1 (1st of 1st CPE)
  572. 1 (right center) | 2 (2nd of 1st CPE)
  573. 4 (left surround) | 3 (1st of 2nd CPE)
  574. 5 (right surround) | 4 (2nd of 2nd CPE)
  575. 3 (LFE) | 5 (LFE)
  576. ------------------------------------------
  577. \endverbatim
  578. \section suppBitrates Supported Bitrates
  579. The FDK AAC Encoder provides a wide range of supported bitrates.
  580. The minimum and maximum allowed bitrate depends on the Audio Object Type. For
  581. AAC-LC the minimum bitrate is the bitrate that is required to write the most
  582. basic and minimal valid bitstream. It consists of the bitstream format header
  583. information and other static/mandatory information within the AAC payload. The
  584. maximum AAC framesize allowed by the MPEG-4 standard determines the maximum
  585. allowed bitrate for AAC-LC. For HE-AAC and HE-AAC v2 a library internal look-up
  586. table is used.
  587. A good working point in terms of audio quality, sampling rate and bitrate, is at
  588. 1 to 1.5 bits/audio sample for AAC-LC, 0.625 bits/audio sample for dualrate
  589. HE-AAC, 1.125 bits/audio sample for downsampled HE-AAC and 0.5 bits/audio sample
  590. for HE-AAC v2. For example for one channel with a sampling frequency of 48 kHz,
  591. the range from 48 kbit/s to 72 kbit/s achieves reasonable audio quality for
  592. AAC-LC.
  593. For HE-AAC and HE-AAC v2 the lowest possible audio input sampling frequency is
  594. 16 kHz because then the AAC-LC core encoder operates in dual rate mode at its
  595. lowest possible sampling frequency, which is 8 kHz. HE-AAC v2 requires stereo
  596. input audio data.
  597. Please note that in HE-AAC or HE-AAC v2 mode the encoder supports much higher
  598. bitrates than are appropriate for HE-AAC or HE-AAC v2. For example, at a bitrate
  599. of more than 64 kbit/s for a stereo audio signal at 44.1 kHz it usually makes
  600. sense to use AAC-LC, which will produce better audio quality at that bitrate
  601. than HE-AAC or HE-AAC v2.
  602. \section reommendedConfig Recommended Sampling Rate and Bitrate Combinations
  603. The following table provides an overview of recommended encoder configuration
  604. parameters which we determined by virtue of numerous listening tests.
  605. \subsection reommendedConfigLC AAC-LC, HE-AAC, HE-AACv2 in Dualrate SBR mode.
  606. \verbatim
  607. -----------------------------------------------------------------------------------
  608. Audio Object Type | Bit Rate Range | Supported | Preferred | No.
  609. of | [bit/s] | Sampling Rates | Sampl. | Chan. |
  610. | [kHz] | Rate | | |
  611. | [kHz] |
  612. -------------------+------------------+-----------------------+------------+-------
  613. AAC LC + SBR + PS | 8000 - 11999 | 22.05, 24.00 | 24.00 | 2
  614. AAC LC + SBR + PS | 12000 - 17999 | 32.00 | 32.00 | 2
  615. AAC LC + SBR + PS | 18000 - 39999 | 32.00, 44.10, 48.00 | 44.10 | 2
  616. AAC LC + SBR + PS | 40000 - 64000 | 32.00, 44.10, 48.00 | 48.00 | 2
  617. -------------------+------------------+-----------------------+------------+-------
  618. AAC LC + SBR | 8000 - 11999 | 22.05, 24.00 | 24.00 | 1
  619. AAC LC + SBR | 12000 - 17999 | 32.00 | 32.00 | 1
  620. AAC LC + SBR | 18000 - 39999 | 32.00, 44.10, 48.00 | 44.10 | 1
  621. AAC LC + SBR | 40000 - 64000 | 32.00, 44.10, 48.00 | 48.00 | 1
  622. -------------------+------------------+-----------------------+------------+-------
  623. AAC LC + SBR | 16000 - 27999 | 32.00, 44.10, 48.00 | 32.00 | 2
  624. AAC LC + SBR | 28000 - 63999 | 32.00, 44.10, 48.00 | 44.10 | 2
  625. AAC LC + SBR | 64000 - 128000 | 32.00, 44.10, 48.00 | 48.00 | 2
  626. -------------------+------------------+-----------------------+------------+-------
  627. AAC LC + SBR | 64000 - 69999 | 32.00, 44.10, 48.00 | 32.00 |
  628. 5, 5.1 AAC LC + SBR | 70000 - 239999 | 32.00, 44.10, 48.00 | 44.10
  629. | 5, 5.1 AAC LC + SBR | 240000 - 319999 | 32.00, 44.10, 48.00 |
  630. 48.00 | 5, 5.1
  631. -------------------+------------------+-----------------------+------------+-------
  632. AAC LC | 8000 - 15999 | 11.025, 12.00, 16.00 | 12.00 | 1
  633. AAC LC | 16000 - 23999 | 16.00 | 16.00 | 1
  634. AAC LC | 24000 - 31999 | 16.00, 22.05, 24.00 | 24.00 | 1
  635. AAC LC | 32000 - 55999 | 32.00 | 32.00 | 1
  636. AAC LC | 56000 - 160000 | 32.00, 44.10, 48.00 | 44.10 | 1
  637. AAC LC | 160001 - 288000 | 48.00 | 48.00 | 1
  638. -------------------+------------------+-----------------------+------------+-------
  639. AAC LC | 16000 - 23999 | 11.025, 12.00, 16.00 | 12.00 | 2
  640. AAC LC | 24000 - 31999 | 16.00 | 16.00 | 2
  641. AAC LC | 32000 - 39999 | 16.00, 22.05, 24.00 | 22.05 | 2
  642. AAC LC | 40000 - 95999 | 32.00 | 32.00 | 2
  643. AAC LC | 96000 - 111999 | 32.00, 44.10, 48.00 | 32.00 | 2
  644. AAC LC | 112000 - 320001 | 32.00, 44.10, 48.00 | 44.10 | 2
  645. AAC LC | 320002 - 576000 | 48.00 | 48.00 | 2
  646. -------------------+------------------+-----------------------+------------+-------
  647. AAC LC | 160000 - 239999 | 32.00 | 32.00 |
  648. 5, 5.1 AAC LC | 240000 - 279999 | 32.00, 44.10, 48.00 | 32.00
  649. | 5, 5.1 AAC LC | 280000 - 800000 | 32.00, 44.10, 48.00 |
  650. 44.10 | 5, 5.1
  651. -----------------------------------------------------------------------------------
  652. \endverbatim \n
  653. \subsection reommendedConfigLD AAC-LD, AAC-ELD, AAC-ELD with SBR in Dualrate SBR
  654. mode. Unlike to HE-AAC configuration the SBR is not covered by ELD audio object
  655. type and needs to be enabled explicitly. Use ::AACENC_SBR_MODE to configure SBR
  656. and its samplingrate ratio with ::AACENC_SBR_RATIO parameter. \verbatim
  657. -----------------------------------------------------------------------------------
  658. Audio Object Type | Bit Rate Range | Supported | Preferred | No.
  659. of | [bit/s] | Sampling Rates | Sampl. | Chan. |
  660. | [kHz] | Rate | | |
  661. | [kHz] |
  662. -------------------+------------------+-----------------------+------------+-------
  663. ELD + SBR | 18000 - 24999 | 32.00 - 44.10 | 32.00 | 1
  664. ELD + SBR | 25000 - 31999 | 32.00 - 48.00 | 32.00 | 1
  665. ELD + SBR | 32000 - 64000 | 32.00 - 48.00 | 48.00 | 1
  666. -------------------+------------------+-----------------------+------------+-------
  667. ELD + SBR | 32000 - 51999 | 32.00 - 48.00 | 44.10 | 2
  668. ELD + SBR | 52000 - 128000 | 32.00 - 48.00 | 48.00 | 2
  669. -------------------+------------------+-----------------------+------------+-------
  670. ELD + SBR | 78000 - 160000 | 32.00 - 48.00 | 48.00 | 3
  671. -------------------+------------------+-----------------------+------------+-------
  672. ELD + SBR | 104000 - 212000 | 32.00 - 48.00 | 48.00 | 4
  673. -------------------+------------------+-----------------------+------------+-------
  674. ELD + SBR | 130000 - 246000 | 32.00 - 48.00 | 48.00 |
  675. 5, 5.1
  676. -------------------+------------------+-----------------------+------------+-------
  677. LD, ELD | 16000 - 19999 | 16.00 - 24.00 | 16.00 | 1
  678. LD, ELD | 20000 - 39999 | 16.00 - 32.00 | 24.00 | 1
  679. LD, ELD | 40000 - 49999 | 22.05 - 32.00 | 32.00 | 1
  680. LD, ELD | 50000 - 61999 | 24.00 - 44.10 | 32.00 | 1
  681. LD, ELD | 62000 - 84999 | 32.00 - 48.00 | 44.10 | 1
  682. LD, ELD | 85000 - 192000 | 44.10 - 48.00 | 48.00 | 1
  683. -------------------+------------------+-----------------------+------------+-------
  684. LD, ELD | 64000 - 75999 | 24.00 - 32.00 | 32.00 | 2
  685. LD, ELD | 76000 - 97999 | 24.00 - 44.10 | 32.00 | 2
  686. LD, ELD | 98000 - 135999 | 32.00 - 48.00 | 44.10 | 2
  687. LD, ELD | 136000 - 384000 | 44.10 - 48.00 | 48.00 | 2
  688. -------------------+------------------+-----------------------+------------+-------
  689. LD, ELD | 96000 - 113999 | 24.00 - 32.00 | 32.00 | 3
  690. LD, ELD | 114000 - 146999 | 24.00 - 44.10 | 32.00 | 3
  691. LD, ELD | 147000 - 203999 | 32.00 - 48.00 | 44.10 | 3
  692. LD, ELD | 204000 - 576000 | 44.10 - 48.00 | 48.00 | 3
  693. -------------------+------------------+-----------------------+------------+-------
  694. LD, ELD | 128000 - 151999 | 24.00 - 32.00 | 32.00 | 4
  695. LD, ELD | 152000 - 195999 | 24.00 - 44.10 | 32.00 | 4
  696. LD, ELD | 196000 - 271999 | 32.00 - 48.00 | 44.10 | 4
  697. LD, ELD | 272000 - 768000 | 44.10 - 48.00 | 48.00 | 4
  698. -------------------+------------------+-----------------------+------------+-------
  699. LD, ELD | 160000 - 189999 | 24.00 - 32.00 | 32.00 |
  700. 5, 5.1 LD, ELD | 190000 - 244999 | 24.00 - 44.10 | 32.00
  701. | 5, 5.1 LD, ELD | 245000 - 339999 | 32.00 - 48.00 |
  702. 44.10 | 5, 5.1 LD, ELD | 340000 - 960000 | 44.10 - 48.00 |
  703. 48.00 | 5, 5.1
  704. -----------------------------------------------------------------------------------
  705. \endverbatim \n
  706. \subsection reommendedConfigELD AAC-ELD with SBR in Downsampled SBR mode.
  707. \verbatim
  708. -----------------------------------------------------------------------------------
  709. Audio Object Type | Bit Rate Range | Supported | Preferred | No.
  710. of | [bit/s] | Sampling Rates | Sampl. | Chan. |
  711. | [kHz] | Rate | | |
  712. | [kHz] |
  713. -------------------+------------------+-----------------------+------------+-------
  714. ELD + SBR | 18000 - 24999 | 16.00 - 22.05 | 22.05 | 1
  715. (downsampled SBR) | 25000 - 31999 | 16.00 - 24.00 | 24.00 | 1
  716. | 32000 - 47999 | 22.05 - 32.00 | 32.00 | 1
  717. | 48000 - 64000 | 22.05 - 48.00 | 32.00 | 1
  718. -------------------+------------------+-----------------------+------------+-------
  719. ELD + SBR | 32000 - 51999 | 16.00 - 24.00 | 24.00 | 2
  720. (downsampled SBR) | 52000 - 59999 | 22.05 - 24.00 | 24.00 | 2
  721. | 60000 - 95999 | 22.05 - 32.00 | 32.00 | 2
  722. | 96000 - 128000 | 22.05 - 48.00 | 32.00 | 2
  723. -------------------+------------------+-----------------------+------------+-------
  724. ELD + SBR | 78000 - 99999 | 22.05 - 24.00 | 24.00 | 3
  725. (downsampled SBR) | 100000 - 143999 | 22.05 - 32.00 | 32.00 | 3
  726. | 144000 - 159999 | 22.05 - 48.00 | 32.00 | 3
  727. | 160000 - 192000 | 32.00 - 48.00 | 32.00 | 3
  728. -------------------+------------------+-----------------------+------------+-------
  729. ELD + SBR | 104000 - 149999 | 22.05 - 24.00 | 24.00 | 4
  730. (downsampled SBR) | 150000 - 191999 | 22.05 - 32.00 | 32.00 | 4
  731. | 192000 - 211999 | 22.05 - 48.00 | 32.00 | 4
  732. | 212000 - 256000 | 32.00 - 48.00 | 32.00 | 4
  733. -------------------+------------------+-----------------------+------------+-------
  734. ELD + SBR | 130000 - 171999 | 22.05 - 24.00 | 24.00 |
  735. 5, 5.1 (downsampled SBR) | 172000 - 239999 | 22.05 - 32.00 | 32.00
  736. | 5, 5.1 | 240000 - 320000 | 32.00 - 48.00 | 32.00 | 5, 5.1
  737. -----------------------------------------------------------------------------------
  738. \endverbatim \n
  739. \subsection reommendedConfigELDv2 AAC-ELD v2, AAC-ELD v2 with SBR.
  740. The ELD v2 212 configuration must be configured explicitly with
  741. ::AACENC_CHANNELMODE parameter according MODE_212 value. SBR can be configured
  742. separately through ::AACENC_SBR_MODE and ::AACENC_SBR_RATIO parameter. Following
  743. configurations shall apply to both framelengths 480 and 512. For ELD v2
  744. configuration without SBR and framelength 480 the supported sampling rate is
  745. restricted to the range from 16 kHz up to 24 kHz. \verbatim
  746. -----------------------------------------------------------------------------------
  747. Audio Object Type | Bit Rate Range | Supported | Preferred | No.
  748. of | [bit/s] | Sampling Rates | Sampl. | Chan. |
  749. | [kHz] | Rate | | |
  750. | [kHz] |
  751. -------------------+------------------+-----------------------+------------+-------
  752. ELD-212 | 16000 - 19999 | 16.00 - 24.00 | 16.00 | 2
  753. (without SBR) | 20000 - 39999 | 16.00 - 32.00 | 24.00 | 2
  754. | 40000 - 49999 | 22.05 - 32.00 | 32.00 | 2
  755. | 50000 - 61999 | 24.00 - 44.10 | 32.00 | 2
  756. | 62000 - 84999 | 32.00 - 48.00 | 44.10 | 2
  757. | 85000 - 192000 | 44.10 - 48.00 | 48.00 | 2
  758. -------------------+------------------+-----------------------+------------+-------
  759. ELD-212 + SBR | 18000 - 20999 | 32.00 | 32.00 | 2
  760. (dualrate SBR) | 21000 - 25999 | 32.00 - 44.10 | 32.00 | 2
  761. | 26000 - 31999 | 32.00 - 48.00 | 44.10 | 2
  762. | 32000 - 64000 | 32.00 - 48.00 | 48.00 | 2
  763. -------------------+------------------+-----------------------+------------+-------
  764. ELD-212 + SBR | 18000 - 19999 | 16.00 - 22.05 | 22.05 | 2
  765. (downsampled SBR) | 20000 - 24999 | 16.00 - 24.00 | 22.05 | 2
  766. | 25000 - 31999 | 16.00 - 24.00 | 24.00 | 2
  767. | 32000 - 64000 | 24.00 - 24.00 | 24.00 | 2
  768. -------------------+------------------+-----------------------+------------+-------
  769. \endverbatim \n
  770. \page ENCODERBEHAVIOUR Encoder Behaviour
  771. \section BEHAVIOUR_BANDWIDTH Bandwidth
  772. The FDK AAC encoder usually does not use the full frequency range of the input
  773. signal, but restricts the bandwidth according to certain library-internal
  774. settings. They can be changed in the table "bandWidthTable" in the file
  775. bandwidth.cpp (if available).
  776. The encoder API provides the ::AACENC_BANDWIDTH parameter to adjust the
  777. bandwidth explicitly. \code aacEncoder_SetParam(hAacEncoder, AACENC_BANDWIDTH,
  778. value); \endcode
  779. However it is not recommended to change these settings, because they are based
  780. on numerous listening tests and careful tweaks to ensure the best overall
  781. encoding quality. Also, the maximum bandwidth that can be set manually by the
  782. user is 20kHz or fs/2, whichever value is smaller.
  783. Theoretically a signal of for example 48 kHz can contain frequencies up to 24
  784. kHz, but to use this full range in an audio encoder usually does not make sense.
  785. Usually the encoder has a very limited amount of bits to spend (typically 128
  786. kbit/s for stereo 48 kHz content) and to allow full range bandwidth would waste
  787. a lot of these bits for frequencies the human ear is hardly able to perceive
  788. anyway, if at all. Hence it is wise to use the available bits for the really
  789. important frequency range and just skip the rest. At lower bitrates (e. g. <= 80
  790. kbit/s for stereo 48 kHz content) the encoder will choose an even smaller
  791. bandwidth, because an encoded signal with smaller bandwidth and hence less
  792. artifacts sounds better than a signal with higher bandwidth but then more coding
  793. artefacts across all frequencies. These artefacts would occur if small bitrates
  794. and high bandwidths are chosen because the available bits are just not enough to
  795. encode all frequencies well.
  796. Unfortunately some people evaluate encoding quality based on possible bandwidth
  797. as well, but it is a double-edged sword considering the trade-off described
  798. above.
  799. Another aspect is workload consumption. The higher the allowed bandwidth, the
  800. more frequency lines have to be processed, which in turn increases the workload.
  801. \section FRAMESIZES_AND_BIT_RESERVOIR Frame Sizes & Bit Reservoir
  802. For AAC there is a difference between constant bit rate and constant frame
  803. length due to the so-called bit reservoir technique, which allows the encoder to
  804. use less bits in an AAC frame for those audio signal sections which are easy to
  805. encode, and then spend them at a later point in time for more complex audio
  806. sections. The extent to which this "bit exchange" is done is limited to allow
  807. for reliable and relatively low delay real time streaming. Therefore, for
  808. AAC-ELD, the bitreservoir is limited. It varies between 500 and 4000 bits/frame,
  809. depending on the bitrate/channel.
  810. - For a bitrate of 12kbps/channel and below, the AAC-ELD bitreservoir is 500
  811. bits/frame.
  812. - For a bitrate of 70kbps/channel and above, the AAC-ELD bitreservoir is 4000
  813. bits/frame.
  814. - Between 12kbps/channel and 70kbps/channel, the AAC-ELD bitrervoir is increased
  815. linearly.
  816. - For AAC-LC, the bitrate is only limited by the maximum AAC frame length. It
  817. is, regardless of the available bit reservoir, defined as 6144 bits per channel.
  818. Over a longer period in time the bitrate will be constant in the AAC constant
  819. bitrate mode, e.g. for ISDN transmission. This means that in AAC each bitstream
  820. frame will in general have a different length in bytes but over time it
  821. will reach the target bitrate.
  822. One could also make an MPEG compliant
  823. AAC encoder which always produces constant length packages for each AAC frame,
  824. but the audio quality would be considerably worse since the bit reservoir
  825. technique would have to be switched off completely. A higher bit rate would have
  826. to be used to get the same audio quality as with an enabled bit reservoir.
  827. For mp3 by the way, the same bit reservoir technique exists, but there each bit
  828. stream frame has a constant length for a given bit rate (ignoring the
  829. padding byte). In mp3 there is a so-called "back pointer" which tells
  830. the decoder which bits belong to the current mp3 frame - and in general some or
  831. many bits have been transmitted in an earlier mp3 frame. Basically this leads to
  832. the same "bit exchange between mp3 frames" as in AAC but with virtually constant
  833. length frames.
  834. This variable frame length at "constant bit rate" is not something special
  835. in this Fraunhofer IIS AAC encoder. AAC has been designed in that way.
  836. \subsection BEHAVIOUR_ESTIM_AVG_FRAMESIZES Estimating Average Frame Sizes
  837. A HE-AAC v1 or v2 audio frame contains 2048 PCM samples per channel.
  838. The number of HE-AAC frames \f$N\_FRAMES\f$ per second at 44.1 kHz is:
  839. \f[
  840. N\_FRAMES = 44100 / 2048 = 21.5332
  841. \f]
  842. At a bit rate of 8 kbps the average number of bits per frame
  843. \f$N\_BITS\_PER\_FRAME\f$ is:
  844. \f[
  845. N\_BITS\_PER\_FRAME = 8000 / 21.5332 = 371.52
  846. \f]
  847. which is about 46.44 bytes per encoded frame.
  848. At a bit rate of 32 kbps, which is quite high for single channel HE-AAC v1, it
  849. is:
  850. \f[
  851. N\_BITS\_PER\_FRAME = 32000 / 21.5332 = 1486
  852. \f]
  853. which is about 185.76 bytes per encoded frame.
  854. These bits/frame figures are average figures where each AAC frame generally has
  855. a different size in bytes. To calculate the same for AAC-LC just use 1024
  856. instead of 2048 PCM samples per frame and channel. For AAC-LD/ELD it is either
  857. 480 or 512 PCM samples per frame and channel.
  858. \section BEHAVIOUR_TOOLS Encoder Tools
  859. The AAC encoder supports TNS, PNS, MS, Intensity and activates these tools
  860. depending on the audio signal and the encoder configuration (i.e. bitrate or
  861. AOT). It is not required to configure these tools manually.
  862. PNS improves encoding quality only for certain bitrates. Therefore it makes
  863. sense to activate PNS only for these bitrates and save the processing power
  864. required for PNS (about 10 % of the encoder) when using other bitrates. This is
  865. done automatically inside the encoder library. PNS is disabled inside the
  866. encoder library if an MPEG-2 AOT is choosen since PNS is an MPEG-4 AAC feature.
  867. If SBR is activated, the encoder automatically deactivates PNS internally. If
  868. TNS is disabled but PNS is allowed, the encoder deactivates PNS calculation
  869. internally.
  870. */
  871. #ifndef AACENC_LIB_H
  872. #define AACENC_LIB_H
  873. #include "machine_type.h"
  874. #include "FDK_audio.h"
  875. #define AACENCODER_LIB_VL0 4
  876. #define AACENCODER_LIB_VL1 0
  877. #define AACENCODER_LIB_VL2 1
  878. /**
  879. * AAC encoder error codes.
  880. */
  881. typedef enum {
  882. AACENC_OK = 0x0000, /*!< No error happened. All fine. */
  883. AACENC_INVALID_HANDLE =
  884. 0x0020, /*!< Handle passed to function call was invalid. */
  885. AACENC_MEMORY_ERROR = 0x0021, /*!< Memory allocation failed. */
  886. AACENC_UNSUPPORTED_PARAMETER = 0x0022, /*!< Parameter not available. */
  887. AACENC_INVALID_CONFIG = 0x0023, /*!< Configuration not provided. */
  888. AACENC_INIT_ERROR = 0x0040, /*!< General initialization error. */
  889. AACENC_INIT_AAC_ERROR = 0x0041, /*!< AAC library initialization error. */
  890. AACENC_INIT_SBR_ERROR = 0x0042, /*!< SBR library initialization error. */
  891. AACENC_INIT_TP_ERROR = 0x0043, /*!< Transport library initialization error. */
  892. AACENC_INIT_META_ERROR =
  893. 0x0044, /*!< Meta data library initialization error. */
  894. AACENC_INIT_MPS_ERROR = 0x0045, /*!< MPS library initialization error. */
  895. AACENC_ENCODE_ERROR = 0x0060, /*!< The encoding process was interrupted by an
  896. unexpected error. */
  897. AACENC_ENCODE_EOF = 0x0080 /*!< End of file reached. */
  898. } AACENC_ERROR;
  899. /**
  900. * AAC encoder buffer descriptors identifier.
  901. * This identifier are used within buffer descriptors
  902. * AACENC_BufDesc::bufferIdentifiers.
  903. */
  904. typedef enum {
  905. /* Input buffer identifier. */
  906. IN_AUDIO_DATA = 0, /*!< Audio input buffer, interleaved INT_PCM samples. */
  907. IN_ANCILLRY_DATA = 1, /*!< Ancillary data to be embedded into bitstream. */
  908. IN_METADATA_SETUP = 2, /*!< Setup structure for embedding meta data. */
  909. /* Output buffer identifier. */
  910. OUT_BITSTREAM_DATA = 3, /*!< Buffer holds bitstream output data. */
  911. OUT_AU_SIZES =
  912. 4 /*!< Buffer contains sizes of each access unit. This information
  913. is necessary for superframing. */
  914. } AACENC_BufferIdentifier;
  915. /**
  916. * AAC encoder handle.
  917. */
  918. typedef struct AACENCODER *HANDLE_AACENCODER;
  919. /**
  920. * Provides some info about the encoder configuration.
  921. */
  922. typedef struct {
  923. UINT maxOutBufBytes; /*!< Maximum number of encoder bitstream bytes within one
  924. frame. Size depends on maximum number of supported
  925. channels in encoder instance. */
  926. UINT maxAncBytes; /*!< Maximum number of ancillary data bytes which can be
  927. inserted into bitstream within one frame. */
  928. UINT inBufFillLevel; /*!< Internal input buffer fill level in samples per
  929. channel. This parameter will automatically be cleared
  930. if samplingrate or channel(Mode/Order) changes. */
  931. UINT inputChannels; /*!< Number of input channels expected in encoding
  932. process. */
  933. UINT frameLength; /*!< Amount of input audio samples consumed each frame per
  934. channel, depending on audio object type configuration. */
  935. UINT nDelay; /*!< Codec delay in PCM samples/channel. Depends on framelength
  936. and AOT. Does not include framing delay for filling up encoder
  937. PCM input buffer. */
  938. UINT nDelayCore; /*!< Codec delay in PCM samples/channel, w/o delay caused by
  939. the decoder SBR module. This delay is needed to correctly
  940. write edit lists for gapless playback. The decoder may not
  941. know how much delay is introdcued by SBR, since it may not
  942. know if SBR is active at all (implicit signaling),
  943. therefore the decoder must take into account any delay
  944. caused by the SBR module. */
  945. UCHAR confBuf[64]; /*!< Configuration buffer in binary format as an
  946. AudioSpecificConfig or StreamMuxConfig according to the
  947. selected transport type. */
  948. UINT confSize; /*!< Number of valid bytes in confBuf. */
  949. } AACENC_InfoStruct;
  950. /**
  951. * Describes the input and output buffers for an aacEncEncode() call.
  952. */
  953. typedef struct {
  954. INT numBufs; /*!< Number of buffers. */
  955. void **bufs; /*!< Pointer to vector containing buffer addresses. */
  956. INT *bufferIdentifiers; /*!< Identifier of each buffer element. See
  957. ::AACENC_BufferIdentifier. */
  958. INT *bufSizes; /*!< Size of each buffer in 8-bit bytes. */
  959. INT *bufElSizes; /*!< Size of each buffer element in bytes. */
  960. } AACENC_BufDesc;
  961. /**
  962. * Defines the input arguments for an aacEncEncode() call.
  963. */
  964. typedef struct {
  965. INT numInSamples; /*!< Number of valid input audio samples (multiple of input
  966. channels). */
  967. INT numAncBytes; /*!< Number of ancillary data bytes to be encoded. */
  968. } AACENC_InArgs;
  969. /**
  970. * Defines the output arguments for an aacEncEncode() call.
  971. */
  972. typedef struct {
  973. INT numOutBytes; /*!< Number of valid bitstream bytes generated during
  974. aacEncEncode(). */
  975. INT numInSamples; /*!< Number of input audio samples consumed by the encoder.
  976. */
  977. INT numAncBytes; /*!< Number of ancillary data bytes consumed by the encoder.
  978. */
  979. INT bitResState; /*!< State of the bit reservoir in bits. */
  980. } AACENC_OutArgs;
  981. /**
  982. * Meta Data Compression Profiles.
  983. */
  984. typedef enum {
  985. AACENC_METADATA_DRC_NONE = 0, /*!< None. */
  986. AACENC_METADATA_DRC_FILMSTANDARD = 1, /*!< Film standard. */
  987. AACENC_METADATA_DRC_FILMLIGHT = 2, /*!< Film light. */
  988. AACENC_METADATA_DRC_MUSICSTANDARD = 3, /*!< Music standard. */
  989. AACENC_METADATA_DRC_MUSICLIGHT = 4, /*!< Music light. */
  990. AACENC_METADATA_DRC_SPEECH = 5, /*!< Speech. */
  991. AACENC_METADATA_DRC_NOT_PRESENT =
  992. 256 /*!< Disable writing gain factor (used for comp_profile only). */
  993. } AACENC_METADATA_DRC_PROFILE;
  994. /**
  995. * Meta Data setup structure.
  996. */
  997. typedef struct {
  998. AACENC_METADATA_DRC_PROFILE
  999. drc_profile; /*!< MPEG DRC compression profile. See
  1000. ::AACENC_METADATA_DRC_PROFILE. */
  1001. AACENC_METADATA_DRC_PROFILE
  1002. comp_profile; /*!< ETSI heavy compression profile. See
  1003. ::AACENC_METADATA_DRC_PROFILE. */
  1004. INT drc_TargetRefLevel; /*!< Used to define expected level to:
  1005. Scaled with 16 bit. x*2^16. */
  1006. INT comp_TargetRefLevel; /*!< Adjust limiter to avoid overload.
  1007. Scaled with 16 bit. x*2^16. */
  1008. INT prog_ref_level_present; /*!< Flag, if prog_ref_level is present */
  1009. INT prog_ref_level; /*!< Programme Reference Level = Dialogue Level:
  1010. -31.75dB .. 0 dB ; stepsize: 0.25dB
  1011. Scaled with 16 bit. x*2^16.*/
  1012. UCHAR PCE_mixdown_idx_present; /*!< Flag, if dmx-idx should be written in
  1013. programme config element */
  1014. UCHAR ETSI_DmxLvl_present; /*!< Flag, if dmx-lvl should be written in
  1015. ETSI-ancData */
  1016. SCHAR centerMixLevel; /*!< Center downmix level (0...7, according to table) */
  1017. SCHAR surroundMixLevel; /*!< Surround downmix level (0...7, according to
  1018. table) */
  1019. UCHAR
  1020. dolbySurroundMode; /*!< Indication for Dolby Surround Encoding Mode.
  1021. - 0: Dolby Surround mode not indicated
  1022. - 1: 2-ch audio part is not Dolby surround encoded
  1023. - 2: 2-ch audio part is Dolby surround encoded */
  1024. UCHAR drcPresentationMode; /*!< Indicatin for DRC Presentation Mode.
  1025. - 0: Presentation mode not inticated
  1026. - 1: Presentation mode 1
  1027. - 2: Presentation mode 2 */
  1028. struct {
  1029. /* extended ancillary data */
  1030. UCHAR extAncDataEnable; /*< Indicates if MPEG4_ext_ancillary_data() exists.
  1031. - 0: No MPEG4_ext_ancillary_data().
  1032. - 1: Insert MPEG4_ext_ancillary_data(). */
  1033. UCHAR
  1034. extDownmixLevelEnable; /*< Indicates if ext_downmixing_levels() exists.
  1035. - 0: No ext_downmixing_levels().
  1036. - 1: Insert ext_downmixing_levels(). */
  1037. UCHAR extDownmixLevel_A; /*< Downmix level index A (0...7, according to
  1038. table) */
  1039. UCHAR extDownmixLevel_B; /*< Downmix level index B (0...7, according to
  1040. table) */
  1041. UCHAR dmxGainEnable; /*< Indicates if ext_downmixing_global_gains() exists.
  1042. - 0: No ext_downmixing_global_gains().
  1043. - 1: Insert ext_downmixing_global_gains(). */
  1044. INT dmxGain5; /*< Gain factor for downmix to 5 channels.
  1045. -15.75dB .. -15.75dB; stepsize: 0.25dB
  1046. Scaled with 16 bit. x*2^16.*/
  1047. INT dmxGain2; /*< Gain factor for downmix to 2 channels.
  1048. -15.75dB .. -15.75dB; stepsize: 0.25dB
  1049. Scaled with 16 bit. x*2^16.*/
  1050. UCHAR lfeDmxEnable; /*< Indicates if ext_downmixing_lfe_level() exists.
  1051. - 0: No ext_downmixing_lfe_level().
  1052. - 1: Insert ext_downmixing_lfe_level(). */
  1053. UCHAR lfeDmxLevel; /*< Downmix level index for LFE (0..15, according to
  1054. table) */
  1055. } ExtMetaData;
  1056. } AACENC_MetaData;
  1057. /**
  1058. * AAC encoder control flags.
  1059. *
  1060. * In interaction with the ::AACENC_CONTROL_STATE parameter it is possible to
  1061. * get information about the internal initialization process. It is also
  1062. * possible to overwrite the internal state from extern when necessary.
  1063. */
  1064. typedef enum {
  1065. AACENC_INIT_NONE = 0x0000, /*!< Do not trigger initialization. */
  1066. AACENC_INIT_CONFIG =
  1067. 0x0001, /*!< Initialize all encoder modules configuration. */
  1068. AACENC_INIT_STATES = 0x0002, /*!< Reset all encoder modules history buffer. */
  1069. AACENC_INIT_TRANSPORT =
  1070. 0x1000, /*!< Initialize transport lib with new parameters. */
  1071. AACENC_RESET_INBUFFER =
  1072. 0x2000, /*!< Reset fill level of internal input buffer. */
  1073. AACENC_INIT_ALL = 0xFFFF /*!< Initialize all. */
  1074. } AACENC_CTRLFLAGS;
  1075. /**
  1076. * \brief AAC encoder setting parameters.
  1077. *
  1078. * Use aacEncoder_SetParam() function to configure, or use aacEncoder_GetParam()
  1079. * function to read the internal status of the following parameters.
  1080. */
  1081. typedef enum {
  1082. AACENC_AOT =
  1083. 0x0100, /*!< Audio object type. See ::AUDIO_OBJECT_TYPE in FDK_audio.h.
  1084. - 2: MPEG-4 AAC Low Complexity.
  1085. - 5: MPEG-4 AAC Low Complexity with Spectral Band Replication
  1086. (HE-AAC).
  1087. - 29: MPEG-4 AAC Low Complexity with Spectral Band
  1088. Replication and Parametric Stereo (HE-AAC v2). This
  1089. configuration can be used only with stereo input audio data.
  1090. - 23: MPEG-4 AAC Low-Delay.
  1091. - 39: MPEG-4 AAC Enhanced Low-Delay. Since there is no
  1092. ::AUDIO_OBJECT_TYPE for ELD in combination with SBR defined,
  1093. enable SBR explicitely by ::AACENC_SBR_MODE parameter. The ELD
  1094. v2 212 configuration can be configured by ::AACENC_CHANNELMODE
  1095. parameter.
  1096. - 129: MPEG-2 AAC Low Complexity.
  1097. - 132: MPEG-2 AAC Low Complexity with Spectral Band
  1098. Replication (HE-AAC).
  1099. Please note that the virtual MPEG-2 AOT's basically disables
  1100. non-existing Perceptual Noise Substitution tool in AAC encoder
  1101. and controls the MPEG_ID flag in adts header. The virtual
  1102. MPEG-2 AOT doesn't prohibit specific transport formats. */
  1103. AACENC_BITRATE = 0x0101, /*!< Total encoder bitrate. This parameter is
  1104. mandatory and interacts with ::AACENC_BITRATEMODE.
  1105. - CBR: Bitrate in bits/second.
  1106. - VBR: Variable bitrate. Bitrate argument will
  1107. be ignored. See \ref suppBitrates for details. */
  1108. AACENC_BITRATEMODE = 0x0102, /*!< Bitrate mode. Configuration can be different
  1109. kind of bitrate configurations:
  1110. - 0: Constant bitrate, use bitrate according
  1111. to ::AACENC_BITRATE. (default) Within none
  1112. LD/ELD ::AUDIO_OBJECT_TYPE, the CBR mode makes
  1113. use of full allowed bitreservoir. In contrast,
  1114. at Low-Delay ::AUDIO_OBJECT_TYPE the
  1115. bitreservoir is kept very small.
  1116. - 1: Variable bitrate mode, \ref vbrmode
  1117. "very low bitrate".
  1118. - 2: Variable bitrate mode, \ref vbrmode
  1119. "low bitrate".
  1120. - 3: Variable bitrate mode, \ref vbrmode
  1121. "medium bitrate".
  1122. - 4: Variable bitrate mode, \ref vbrmode
  1123. "high bitrate".
  1124. - 5: Variable bitrate mode, \ref vbrmode
  1125. "very high bitrate". */
  1126. AACENC_SAMPLERATE = 0x0103, /*!< Audio input data sampling rate. Encoder
  1127. supports following sampling rates: 8000, 11025,
  1128. 12000, 16000, 22050, 24000, 32000, 44100,
  1129. 48000, 64000, 88200, 96000 */
  1130. AACENC_SBR_MODE = 0x0104, /*!< Configure SBR independently of the chosen Audio
  1131. Object Type ::AUDIO_OBJECT_TYPE. This parameter
  1132. is for ELD audio object type only.
  1133. - -1: Use ELD SBR auto configurator (default).
  1134. - 0: Disable Spectral Band Replication.
  1135. - 1: Enable Spectral Band Replication. */
  1136. AACENC_GRANULE_LENGTH =
  1137. 0x0105, /*!< Core encoder (AAC) audio frame length in samples:
  1138. - 1024: Default configuration.
  1139. - 512: Default length in LD/ELD configuration.
  1140. - 480: Length in LD/ELD configuration.
  1141. - 256: Length for ELD reduced delay mode (x2).
  1142. - 240: Length for ELD reduced delay mode (x2).
  1143. - 128: Length for ELD reduced delay mode (x4).
  1144. - 120: Length for ELD reduced delay mode (x4). */
  1145. AACENC_CHANNELMODE = 0x0106, /*!< Set explicit channel mode. Channel mode must
  1146. match with number of input channels.
  1147. - 1-7, 11,12,14 and 33,34: MPEG channel
  1148. modes supported, see ::CHANNEL_MODE in
  1149. FDK_audio.h. */
  1150. AACENC_CHANNELORDER =
  1151. 0x0107, /*!< Input audio data channel ordering scheme:
  1152. - 0: MPEG channel ordering (e. g. 5.1: C, L, R, SL, SR, LFE).
  1153. (default)
  1154. - 1: WAVE file format channel ordering (e. g. 5.1: L, R, C,
  1155. LFE, SL, SR). */
  1156. AACENC_SBR_RATIO =
  1157. 0x0108, /*!< Controls activation of downsampled SBR. With downsampled
  1158. SBR, the delay will be shorter. On the other hand, for
  1159. achieving the same quality level, downsampled SBR needs more
  1160. bits than dual-rate SBR. With downsampled SBR, the AAC encoder
  1161. will work at the same sampling rate as the SBR encoder (single
  1162. rate). Downsampled SBR is supported for AAC-ELD and HE-AACv1.
  1163. - 1: Downsampled SBR (default for ELD).
  1164. - 2: Dual-rate SBR (default for HE-AAC). */
  1165. AACENC_AFTERBURNER =
  1166. 0x0200, /*!< This parameter controls the use of the afterburner feature.
  1167. The afterburner is a type of analysis by synthesis algorithm
  1168. which increases the audio quality but also the required
  1169. processing power. It is recommended to always activate this if
  1170. additional memory consumption and processing power consumption
  1171. is not a problem. If increased MHz and memory consumption are
  1172. an issue then the MHz and memory cost of this optional module
  1173. need to be evaluated against the improvement in audio quality
  1174. on a case by case basis.
  1175. - 0: Disable afterburner (default).
  1176. - 1: Enable afterburner. */
  1177. AACENC_BANDWIDTH = 0x0203, /*!< Core encoder audio bandwidth:
  1178. - 0: Determine audio bandwidth internally
  1179. (default, see chapter \ref BEHAVIOUR_BANDWIDTH).
  1180. - 1 to fs/2: Audio bandwidth in Hertz. Limited
  1181. to 20kHz max. Not usable if SBR is active. This
  1182. setting is for experts only, better do not touch
  1183. this value to avoid degraded audio quality. */
  1184. AACENC_PEAK_BITRATE =
  1185. 0x0207, /*!< Peak bitrate configuration parameter to adjust maximum bits
  1186. per audio frame. Bitrate is in bits/second. The peak bitrate
  1187. will internally be limited to the chosen bitrate
  1188. ::AACENC_BITRATE as lower limit and the
  1189. number_of_effective_channels*6144 bit as upper limit.
  1190. Setting the peak bitrate equal to ::AACENC_BITRATE does not
  1191. necessarily mean that the audio frames will be of constant
  1192. size. Since the peak bitate is in bits/second, the frame sizes
  1193. can vary by one byte in one or the other direction over various
  1194. frames. However, it is not recommended to reduce the peak
  1195. pitrate to ::AACENC_BITRATE - it would disable the
  1196. bitreservoir, which would affect the audio quality by a large
  1197. amount. */
  1198. AACENC_TRANSMUX = 0x0300, /*!< Transport type to be used. See ::TRANSPORT_TYPE
  1199. in FDK_audio.h. Following types can be configured
  1200. in encoder library:
  1201. - 0: raw access units
  1202. - 1: ADIF bitstream format
  1203. - 2: ADTS bitstream format
  1204. - 6: Audio Mux Elements (LATM) with
  1205. muxConfigPresent = 1
  1206. - 7: Audio Mux Elements (LATM) with
  1207. muxConfigPresent = 0, out of band StreamMuxConfig
  1208. - 10: Audio Sync Stream (LOAS) */
  1209. AACENC_HEADER_PERIOD =
  1210. 0x0301, /*!< Frame count period for sending in-band configuration buffers
  1211. within LATM/LOAS transport layer. Additionally this parameter
  1212. configures the PCE repetition period in raw_data_block(). See
  1213. \ref encPCE.
  1214. - 0xFF: auto-mode default 10 for TT_MP4_ADTS, TT_MP4_LOAS and
  1215. TT_MP4_LATM_MCP1, otherwise 0.
  1216. - n: Frame count period. */
  1217. AACENC_SIGNALING_MODE =
  1218. 0x0302, /*!< Signaling mode of the extension AOT:
  1219. - 0: Implicit backward compatible signaling (default for
  1220. non-MPEG-4 based AOT's and for the transport formats ADIF and
  1221. ADTS)
  1222. - A stream that uses implicit signaling can be decoded
  1223. by every AAC decoder, even AAC-LC-only decoders
  1224. - An AAC-LC-only decoder will only decode the
  1225. low-frequency part of the stream, resulting in a band-limited
  1226. output
  1227. - This method works with all transport formats
  1228. - This method does not work with downsampled SBR
  1229. - 1: Explicit backward compatible signaling
  1230. - A stream that uses explicit backward compatible
  1231. signaling can be decoded by every AAC decoder, even AAC-LC-only
  1232. decoders
  1233. - An AAC-LC-only decoder will only decode the
  1234. low-frequency part of the stream, resulting in a band-limited
  1235. output
  1236. - A decoder not capable of decoding PS will only decode
  1237. the AAC-LC+SBR part. If the stream contained PS, the result
  1238. will be a a decoded mono downmix
  1239. - This method does not work with ADIF or ADTS. For
  1240. LOAS/LATM, it only works with AudioMuxVersion==1
  1241. - This method does work with downsampled SBR
  1242. - 2: Explicit hierarchical signaling (default for MPEG-4
  1243. based AOT's and for all transport formats excluding ADIF and
  1244. ADTS)
  1245. - A stream that uses explicit hierarchical signaling can
  1246. be decoded only by HE-AAC decoders
  1247. - An AAC-LC-only decoder will not decode a stream that
  1248. uses explicit hierarchical signaling
  1249. - A decoder not capable of decoding PS will not decode
  1250. the stream at all if it contained PS
  1251. - This method does not work with ADIF or ADTS. It works
  1252. with LOAS/LATM and the MPEG-4 File format
  1253. - This method does work with downsampled SBR
  1254. For making sure that the listener always experiences the
  1255. best audio quality, explicit hierarchical signaling should be
  1256. used. This makes sure that only a full HE-AAC-capable decoder
  1257. will decode those streams. The audio is played at full
  1258. bandwidth. For best backwards compatibility, it is recommended
  1259. to encode with implicit SBR signaling. A decoder capable of
  1260. AAC-LC only will then only decode the AAC part, which means the
  1261. decoded audio will sound band-limited.
  1262. For MPEG-2 transport types (ADTS,ADIF), only implicit
  1263. signaling is possible.
  1264. For LOAS and LATM, explicit backwards compatible signaling
  1265. only works together with AudioMuxVersion==1. The reason is
  1266. that, for explicit backwards compatible signaling, additional
  1267. information will be appended to the ASC. A decoder that is only
  1268. capable of decoding AAC-LC will skip this part. Nevertheless,
  1269. for jumping to the end of the ASC, it needs to know the ASC
  1270. length. Transmitting the length of the ASC is a feature of
  1271. AudioMuxVersion==1, it is not possible to transmit the length
  1272. of the ASC with AudioMuxVersion==0, therefore an AAC-LC-only
  1273. decoder will not be able to parse a LOAS/LATM stream that was
  1274. being encoded with AudioMuxVersion==0.
  1275. For downsampled SBR, explicit signaling is mandatory. The
  1276. reason for this is that the extension sampling frequency (which
  1277. is in case of SBR the sampling frequqncy of the SBR part) can
  1278. only be signaled in explicit mode.
  1279. For AAC-ELD, the SBR information is transmitted in the
  1280. ELDSpecific Config, which is part of the AudioSpecificConfig.
  1281. Therefore, the settings here will have no effect on AAC-ELD.*/
  1282. AACENC_TPSUBFRAMES =
  1283. 0x0303, /*!< Number of sub frames in a transport frame for LOAS/LATM or
  1284. ADTS (default 1).
  1285. - ADTS: Maximum number of sub frames restricted to 4.
  1286. - LOAS/LATM: Maximum number of sub frames restricted to 2.*/
  1287. AACENC_AUDIOMUXVER =
  1288. 0x0304, /*!< AudioMuxVersion to be used for LATM. (AudioMuxVersionA,
  1289. currently not implemented):
  1290. - 0: Default, no transmission of tara Buffer fullness, no ASC
  1291. length and including actual latm Buffer fullnes.
  1292. - 1: Transmission of tara Buffer fullness, ASC length and
  1293. actual latm Buffer fullness.
  1294. - 2: Transmission of tara Buffer fullness, ASC length and
  1295. maximum level of latm Buffer fullness. */
  1296. AACENC_PROTECTION = 0x0306, /*!< Configure protection in transport layer:
  1297. - 0: No protection. (default)
  1298. - 1: CRC active for ADTS transport format. */
  1299. AACENC_ANCILLARY_BITRATE =
  1300. 0x0500, /*!< Constant ancillary data bitrate in bits/second.
  1301. - 0: Either no ancillary data or insert exact number of
  1302. bytes, denoted via input parameter, numAncBytes in
  1303. AACENC_InArgs.
  1304. - else: Insert ancillary data with specified bitrate. */
  1305. AACENC_METADATA_MODE = 0x0600, /*!< Configure Meta Data. See ::AACENC_MetaData
  1306. for further details:
  1307. - 0: Do not embed any metadata.
  1308. - 1: Embed dynamic_range_info metadata.
  1309. - 2: Embed dynamic_range_info and
  1310. ancillary_data metadata.
  1311. - 3: Embed ancillary_data metadata. */
  1312. AACENC_CONTROL_STATE =
  1313. 0xFF00, /*!< There is an automatic process which internally reconfigures
  1314. the encoder instance when a configuration parameter changed or
  1315. an error occured. This paramerter allows overwriting or getting
  1316. the control status of this process. See ::AACENC_CTRLFLAGS. */
  1317. AACENC_NONE = 0xFFFF /*!< ------ */
  1318. } AACENC_PARAM;
  1319. #ifdef __cplusplus
  1320. extern "C" {
  1321. #endif
  1322. /**
  1323. * \brief Open an instance of the encoder.
  1324. *
  1325. * Allocate memory for an encoder instance with a functional range denoted by
  1326. * the function parameters. Preinitialize encoder instance with default
  1327. * configuration.
  1328. *
  1329. * \param phAacEncoder A pointer to an encoder handle. Initialized on return.
  1330. * \param encModules Specify encoder modules to be supported in this encoder
  1331. * instance:
  1332. * - 0x0: Allocate memory for all available encoder
  1333. * modules.
  1334. * - else: Select memory allocation regarding encoder
  1335. * modules. Following flags are possible and can be combined.
  1336. * - 0x01: AAC module.
  1337. * - 0x02: SBR module.
  1338. * - 0x04: PS module.
  1339. * - 0x08: MPS module.
  1340. * - 0x10: Metadata module.
  1341. * - example: (0x01|0x02|0x04|0x08|0x10) allocates
  1342. * all modules and is equivalent to default configuration denotet by 0x0.
  1343. * \param maxChannels Number of channels to be allocated. This parameter can
  1344. * be used in different ways:
  1345. * - 0: Allocate maximum number of AAC and SBR channels as
  1346. * supported by the library.
  1347. * - nChannels: Use same maximum number of channels for
  1348. * allocating memory in AAC and SBR module.
  1349. * - nChannels | (nSbrCh<<8): Number of SBR channels can be
  1350. * different to AAC channels to save data memory.
  1351. *
  1352. * \return
  1353. * - AACENC_OK, on succes.
  1354. * - AACENC_INVALID_HANDLE, AACENC_MEMORY_ERROR, AACENC_INVALID_CONFIG,
  1355. * on failure.
  1356. */
  1357. AACENC_ERROR aacEncOpen(HANDLE_AACENCODER *phAacEncoder, const UINT encModules,
  1358. const UINT maxChannels);
  1359. /**
  1360. * \brief Close the encoder instance.
  1361. *
  1362. * Deallocate encoder instance and free whole memory.
  1363. *
  1364. * \param phAacEncoder Pointer to the encoder handle to be deallocated.
  1365. *
  1366. * \return
  1367. * - AACENC_OK, on success.
  1368. * - AACENC_INVALID_HANDLE, on failure.
  1369. */
  1370. AACENC_ERROR aacEncClose(HANDLE_AACENCODER *phAacEncoder);
  1371. /**
  1372. * \brief Encode audio data.
  1373. *
  1374. * This function is mainly for encoding audio data. In addition the function can
  1375. * be used for an encoder (re)configuration process.
  1376. * - PCM input data will be retrieved from external input buffer until the fill
  1377. * level allows encoding a single frame. This functionality allows an external
  1378. * buffer with reduced size in comparison to the AAC or HE-AAC audio frame
  1379. * length.
  1380. * - If the value of the input samples argument is zero, just internal
  1381. * reinitialization will be applied if it is requested.
  1382. * - At the end of a file the flushing process can be triggerd via setting the
  1383. * value of the input samples argument to -1. The encoder delay lines are fully
  1384. * flushed when the encoder returns no valid bitstream data
  1385. * AACENC_OutArgs::numOutBytes. Furthermore the end of file is signaled by the
  1386. * return value AACENC_ENCODE_EOF.
  1387. * - If an error occured in the previous frame or any of the encoder parameters
  1388. * changed, an internal reinitialization process will be applied before encoding
  1389. * the incoming audio samples.
  1390. * - The function can also be used for an independent reconfiguration process
  1391. * without encoding. The first parameter has to be a valid encoder handle and
  1392. * all other parameters can be set to NULL.
  1393. * - If the size of the external bitbuffer in outBufDesc is not sufficient for
  1394. * writing the whole bitstream, an internal error will be the return value and a
  1395. * reconfiguration will be triggered.
  1396. *
  1397. * \param hAacEncoder A valid AAC encoder handle.
  1398. * \param inBufDesc Input buffer descriptor, see AACENC_BufDesc:
  1399. * - At least one input buffer with audio data is
  1400. * expected.
  1401. * - Optionally a second input buffer with
  1402. * ancillary data can be fed.
  1403. * \param outBufDesc Output buffer descriptor, see AACENC_BufDesc:
  1404. * - Provide one output buffer for the encoded
  1405. * bitstream.
  1406. * \param inargs Input arguments, see AACENC_InArgs.
  1407. * \param outargs Output arguments, AACENC_OutArgs.
  1408. *
  1409. * \return
  1410. * - AACENC_OK, on success.
  1411. * - AACENC_INVALID_HANDLE, AACENC_ENCODE_ERROR, on failure in encoding
  1412. * process.
  1413. * - AACENC_INVALID_CONFIG, AACENC_INIT_ERROR, AACENC_INIT_AAC_ERROR,
  1414. * AACENC_INIT_SBR_ERROR, AACENC_INIT_TP_ERROR, AACENC_INIT_META_ERROR,
  1415. * AACENC_INIT_MPS_ERROR, on failure in encoder initialization.
  1416. * - AACENC_UNSUPPORTED_PARAMETER, on incorrect input or output buffer
  1417. * descriptor initialization.
  1418. * - AACENC_ENCODE_EOF, when flushing fully concluded.
  1419. */
  1420. AACENC_ERROR aacEncEncode(const HANDLE_AACENCODER hAacEncoder,
  1421. const AACENC_BufDesc *inBufDesc,
  1422. const AACENC_BufDesc *outBufDesc,
  1423. const AACENC_InArgs *inargs, AACENC_OutArgs *outargs);
  1424. /**
  1425. * \brief Acquire info about present encoder instance.
  1426. *
  1427. * This function retrieves information of the encoder configuration. In addition
  1428. * to informative internal states, a configuration data block of the current
  1429. * encoder settings will be returned. The format is either Audio Specific Config
  1430. * in case of Raw Packets transport format or StreamMuxConfig in case of
  1431. * LOAS/LATM transport format. The configuration data block is binary coded as
  1432. * specified in ISO/IEC 14496-3 (MPEG-4 audio), to be used directly for MPEG-4
  1433. * File Format or RFC3016 or RFC3640 applications.
  1434. *
  1435. * \param hAacEncoder A valid AAC encoder handle.
  1436. * \param pInfo Pointer to AACENC_InfoStruct. Filled on return.
  1437. *
  1438. * \return
  1439. * - AACENC_OK, on succes.
  1440. * - AACENC_INVALID_HANDLE, AACENC_INIT_ERROR, on failure.
  1441. */
  1442. AACENC_ERROR aacEncInfo(const HANDLE_AACENCODER hAacEncoder,
  1443. AACENC_InfoStruct *pInfo);
  1444. /**
  1445. * \brief Set one single AAC encoder parameter.
  1446. *
  1447. * This function allows configuration of all encoder parameters specified in
  1448. * ::AACENC_PARAM. Each parameter must be set with a separate function call. An
  1449. * internal validation of the configuration value range will be done and an
  1450. * internal reconfiguration will be signaled. The actual configuration adoption
  1451. * is part of the subsequent aacEncEncode() call.
  1452. *
  1453. * \param hAacEncoder A valid AAC encoder handle.
  1454. * \param param Parameter to be set. See ::AACENC_PARAM.
  1455. * \param value Parameter value. See parameter description in
  1456. * ::AACENC_PARAM.
  1457. *
  1458. * \return
  1459. * - AACENC_OK, on success.
  1460. * - AACENC_INVALID_HANDLE, AACENC_UNSUPPORTED_PARAMETER,
  1461. * AACENC_INVALID_CONFIG, on failure.
  1462. */
  1463. AACENC_ERROR aacEncoder_SetParam(const HANDLE_AACENCODER hAacEncoder,
  1464. const AACENC_PARAM param, const UINT value);
  1465. /**
  1466. * \brief Get one single AAC encoder parameter.
  1467. *
  1468. * This function is the complement to aacEncoder_SetParam(). After encoder
  1469. * reinitialization with user defined settings, the internal status can be
  1470. * obtained of each parameter, specified with ::AACENC_PARAM.
  1471. *
  1472. * \param hAacEncoder A valid AAC encoder handle.
  1473. * \param param Parameter to be returned. See ::AACENC_PARAM.
  1474. *
  1475. * \return Internal configuration value of specifed parameter ::AACENC_PARAM.
  1476. */
  1477. UINT aacEncoder_GetParam(const HANDLE_AACENCODER hAacEncoder,
  1478. const AACENC_PARAM param);
  1479. /**
  1480. * \brief Get information about encoder library build.
  1481. *
  1482. * Fill a given LIB_INFO structure with library version information.
  1483. *
  1484. * \param info Pointer to an allocated LIB_INFO struct.
  1485. *
  1486. * \return
  1487. * - AACENC_OK, on success.
  1488. * - AACENC_INVALID_HANDLE, AACENC_INIT_ERROR, on failure.
  1489. */
  1490. AACENC_ERROR aacEncGetLibInfo(LIB_INFO *info);
  1491. #ifdef __cplusplus
  1492. }
  1493. #endif
  1494. #endif /* AACENC_LIB_H */