Codebase list mupen64plus-rsp-hle / f1f96cd
Add initial support for MusyX Sven Eckelmann 10 years ago
3 changed file(s) with 893 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
77 - Add jpeg_memory_alloc_stack.patch, Refactor standard macroblock decoder
88 - Add jpeg_memory_alloc_refactor.patch, Avoid dynamic memory allocation for
99 macroblocks
10 - Add musyx_initial.patch, Add initial support for MusyX
1011 * debian/watch:
1112 - Change upstream check from bitbucket to github
1213 * debian/copyright:
0 Description: Add initial support for MusyX
1 Author: Bobby Smiles <bobby.smiles32@gmail.com>
2
3 ---
4 diff --git a/projects/msvc11/mupen64plus-rsp-hle.vcxproj b/projects/msvc11/mupen64plus-rsp-hle.vcxproj
5 index 38c90592d48dba147723ba96e6c2d6f4e218ad82..b8e9409a9b72b3dabbd0a79eaafb0cdef0c4c3e5 100644
6 --- a/projects/msvc11/mupen64plus-rsp-hle.vcxproj
7 +++ b/projects/msvc11/mupen64plus-rsp-hle.vcxproj
8 @@ -96,6 +96,7 @@
9 <ClCompile Include="..\..\src\cicx105.c" />
10 <ClCompile Include="..\..\src\jpeg.c" />
11 <ClCompile Include="..\..\src\main.c" />
12 + <ClCompile Include="..\..\src\musyx.c" />
13 <ClCompile Include="..\..\src\ucode1.cpp" />
14 <ClCompile Include="..\..\src\ucode2.cpp" />
15 <ClCompile Include="..\..\src\ucode3.cpp" />
16 @@ -107,8 +108,9 @@
17 <ClInclude Include="..\..\src\cicx105.h" />
18 <ClInclude Include="..\..\src\hle.h" />
19 <ClInclude Include="..\..\src\jpeg.h" />
20 + <ClInclude Include="..\..\src\musyx.h" />
21 </ItemGroup>
22 <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
23 <ImportGroup Label="ExtensionTargets">
24 </ImportGroup>
25 -</Project>
26 \ No newline at end of file
27 +</Project>
28 diff --git a/projects/msvc8/mupen64plus-rsp-hle.vcproj b/projects/msvc8/mupen64plus-rsp-hle.vcproj
29 index 1b9522852cbcc8ca09eb027bee68c53924121322..f820a3718e95cd3f584cc12278bb5b5729ca9d6d 100644
30 --- a/projects/msvc8/mupen64plus-rsp-hle.vcproj
31 +++ b/projects/msvc8/mupen64plus-rsp-hle.vcproj
32 @@ -195,6 +195,10 @@
33 >
34 </File>
35 <File
36 + RelativePath="..\..\src\musyx.c"
37 + >
38 + </File>
39 + <File
40 RelativePath="..\..\src\ucode1.cpp"
41 >
42 </File>
43 @@ -236,6 +240,10 @@
44 RelativePath="..\..\src\jpeg.h"
45 >
46 </File>
47 + <File
48 + RelativePath="..\..\src\musyx.h"
49 + >
50 + </File>
51 </Filter>
52 </Files>
53 <Globals>
54 diff --git a/projects/unix/Makefile b/projects/unix/Makefile
55 index a6cef2318a77a32655cdba5f03a0d2fa231c0cf9..af975a6a545a261e340cdec5b9dfca109dfdf6bf 100644
56 --- a/projects/unix/Makefile
57 +++ b/projects/unix/Makefile
58 @@ -242,6 +242,7 @@ SOURCE = \
59 $(SRCDIR)/alist.c \
60 $(SRCDIR)/cicx105.c \
61 $(SRCDIR)/jpeg.c \
62 + $(SRCDIR)/musyx.c \
63 $(SRCDIR)/ucode3.cpp \
64 $(SRCDIR)/ucode2.cpp \
65 $(SRCDIR)/ucode1.cpp \
66 diff --git a/src/main.c b/src/main.c
67 index 396321f77680acc9126fa7d17c09e823ce9c67c5..5c9b79fea971638ff7806e3267c85cdf36830630 100644
68 --- a/src/main.c
69 +++ b/src/main.c
70 @@ -33,6 +33,7 @@
71 #include "alist.h"
72 #include "cicx105.h"
73 #include "jpeg.h"
74 +#include "musyx.h"
75
76 #define min(a,b) (((a) < (b)) ? (a) : (b))
77
78 @@ -166,8 +167,7 @@ static int try_fast_audio_dispatching()
79 * GauntletLegend, Rush2049, IndianaJones, BattleForNaboo
80 * TODO: implement ucode
81 **/
82 - DebugMessage(M64MSG_WARNING, "MusyX ucode not implemented.");
83 - /* return 1; */
84 + musyx_task(); return 1;
85 }
86 else
87 {
88 diff --git a/src/musyx.c b/src/musyx.c
89 new file mode 100644
90 index 0000000000000000000000000000000000000000..b3dd98dd2e5d6e511f9eca25c3bd92c3d59d3c3f
91 --- /dev/null
92 +++ b/src/musyx.c
93 @@ -0,0 +1,763 @@
94 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
95 + * Mupen64plus-rsp-hle - musyx.c *
96 + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
97 + * Copyright (C) 2013 Bobby Smiles *
98 + * *
99 + * This program is free software; you can redistribute it and/or modify *
100 + * it under the terms of the GNU General Public License as published by *
101 + * the Free Software Foundation; either version 2 of the License, or *
102 + * (at your option) any later version. *
103 + * *
104 + * This program is distributed in the hope that it will be useful, *
105 + * but WITHOUT ANY WARRANTY; without even the implied warranty of *
106 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
107 + * GNU General Public License for more details. *
108 + * *
109 + * You should have received a copy of the GNU General Public License *
110 + * along with this program; if not, write to the *
111 + * Free Software Foundation, Inc., *
112 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
113 + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
114 +
115 +#include <stdbool.h>
116 +#include <stdint.h>
117 +#include <string.h>
118 +
119 +extern const uint16_t ResampleLUT[0x200];
120 +
121 +#include "m64p_types.h"
122 +#include "hle.h"
123 +
124 +/* various constants */
125 +enum { SUBFRAME_SIZE = 192 };
126 +enum { MAX_VOICES = 32 };
127 +
128 +enum { SAMPLE_BUFFER_SIZE = 0x200 };
129 +
130 +
131 +enum
132 +{
133 + SFD_VOICE_COUNT = 0x0,
134 + SFD_SFX_INDEX = 0x2,
135 + SFD_VOICE_BITMASK = 0x4,
136 + SFD_STATE_PTR = 0x8,
137 + SFD_SFX_PTR = 0xc,
138 +
139 + SFD_VOICES = 0x10
140 +};
141 +
142 +enum
143 +{
144 + VOICE_ENV_BEGIN = 0x00,
145 + VOICE_ENV_STEP = 0x10,
146 + VOICE_PITCH_Q16 = 0x20,
147 + VOICE_PITCH_SHIFT = 0x22,
148 + VOICE_CATSRC_0 = 0x24,
149 + VOICE_CATSRC_1 = 0x30,
150 + VOICE_ADPCM_FRAMES = 0x3c,
151 + VOICE_SKIP_SAMPLES = 0x3e,
152 +
153 + /* for PCM16 */
154 + VOICE_U16_40 = 0x40,
155 + VOICE_U16_42 = 0x42,
156 +
157 + /* for ADPCM */
158 + VOICE_ADPCM_TABLE_PTR = 0x40,
159 +
160 + VOICE_INTERLEAVED_PTR = 0x44,
161 + VOICE_END_POINT = 0x48,
162 + VOICE_RESTART_POINT = 0x4a,
163 + VOICE_U16_4C = 0x4c,
164 + VOICE_U16_4E = 0x4e,
165 +
166 + VOICE_SIZE = 0x50
167 +};
168 +
169 +enum
170 +{
171 + CATSRC_PTR1 = 0x00,
172 + CATSRC_PTR2 = 0x04,
173 + CATSRC_SIZE1 = 0x08,
174 + CATSRC_SIZE2 = 0x0a
175 +};
176 +
177 +enum
178 +{
179 + STATE_LAST_SAMPLE = 0x0,
180 + STATE_BASE_VOL = 0x100,
181 + STATE_CC0 = 0x110,
182 + STATE_740_LAST4 = 0x290
183 +};
184 +
185 +enum
186 +{
187 + SFX_CBUFFER_PTR = 0x00,
188 + SFX_CBUFFER_LENGTH = 0x04,
189 + SFX_TAP_COUNT = 0x08,
190 + SFX_FIR4_HGAIN = 0x0a,
191 + SFX_TAP_DELAYS = 0x0c,
192 + SFX_TAP_GAINS = 0x2c,
193 + // padding = 0x3c
194 + SFX_FIR4_HCOEFFS = 0x40
195 +};
196 +
197 +
198 +/* struct definition */
199 +typedef struct
200 +{
201 + /* internal subframes */
202 + int16_t left[SUBFRAME_SIZE];
203 + int16_t right[SUBFRAME_SIZE];
204 + int16_t cc0[SUBFRAME_SIZE];
205 + int16_t e50[SUBFRAME_SIZE];
206 +
207 + /* internal subframes base volumes */
208 + int32_t base_vol[4];
209 +
210 + /* */
211 + int16_t subframe_740_last4[4];
212 +} musyx_t;
213 +
214 +/* helper functions prototypes */
215 +static void load_base_vol(int32_t* base_vol, uint32_t address);
216 +static void save_base_vol(const int32_t* base_vol, uint32_t address);
217 +static void update_base_vol(int32_t* base_vol, uint32_t voice_mask, uint32_t last_sample_ptr);
218 +
219 +static void init_subframes(musyx_t* musyx);
220 +
221 +static uint32_t voice_stage(musyx_t* musyx, uint32_t voice_ptr, uint32_t last_sample_ptr);
222 +
223 +static void dma_cat8(uint8_t* dst, uint32_t catsrc_ptr);
224 +static void dma_cat16(uint16_t* dst, uint32_t catsrc_ptr);
225 +
226 +static void load_samples_PCM16(uint32_t voice_ptr, int16_t* samples, unsigned* segbase, unsigned* offset);
227 +static void load_samples_ADPCM(uint32_t voice_ptr, int16_t* samples, unsigned* segbase, unsigned* offset);
228 +
229 +static void adpcm_decode_frames(int16_t* dst, const uint8_t* src, const int16_t* table, uint8_t count, uint8_t skip_samples);
230 +
231 +static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, unsigned lshift, unsigned rshift);
232 +static void adpcm_get_predicted_frame(int16_t* dst, const uint8_t* src, const uint8_t* nibbles, unsigned int rshift);
233 +static void adpcm_decode_upto_8_samples(int16_t* dst, const int16_t* src,
234 + const int16_t* cb_entry, const int16_t* last_samples, size_t size);
235 +
236 +static void mix_voice_samples(musyx_t* musyx, uint32_t voice_ptr, const int16_t* samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr);
237 +
238 +static void sfx_stage(musyx_t* musyx, uint32_t sfx_ptr, uint16_t idx);
239 +
240 +static void interleave_stage(musyx_t* musyx, uint32_t output_ptr);
241 +
242 +
243 +static uint8_t* dram_u8 (uint32_t address);
244 +static uint16_t* dram_u16(uint32_t address);
245 +static uint32_t* dram_u32(uint32_t address);
246 +
247 +static void load_u8(uint8_t* dst, uint32_t address, size_t count);
248 +static void load_u16(uint16_t* dst, uint32_t address, size_t count);
249 +static void load_u32(uint32_t* dst, uint32_t address, size_t count);
250 +
251 +static void store_u16(const uint16_t* src, uint32_t address, size_t count);
252 +
253 +static inline int16_t clamp_s16(int32_t x)
254 +{
255 + if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; }
256 + return x;
257 +}
258 +
259 +static inline unsigned int align(unsigned int x, unsigned amount)
260 +{
261 + --amount;
262 + return (x + amount) & ~amount;
263 +}
264 +
265 +static int32_t rdot(size_t n, const int16_t* x, const int16_t* y)
266 +{
267 + int32_t accu = 0;
268 +
269 + y += n;
270 +
271 + while(n != 0)
272 + {
273 + accu += ((int32_t)*(x++) * (int32_t)*(--y));
274 + --n;
275 + }
276 +
277 + return accu;
278 +}
279 +
280 +
281 +static int32_t dot4(const int16_t* x, const int16_t* y)
282 +{
283 + size_t i;
284 + int32_t accu = 0;
285 +
286 + for(i = 0; i < 4; ++i)
287 + accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15));
288 +
289 + return accu;
290 +}
291 +
292 +/* Fast and dirty way of reading dram memory
293 + * Assume properly aligned access
294 + */
295 +uint8_t* dram_u8(uint32_t address)
296 +{
297 + return (uint8_t*)&rsp.RDRAM[(address & 0xffffff) ^ S8];
298 +}
299 +
300 +uint16_t* dram_u16(uint32_t address)
301 +{
302 + return (uint16_t*)&rsp.RDRAM[(address & 0xffffff) ^ S16];
303 +}
304 +
305 +uint32_t* dram_u32(uint32_t address)
306 +{
307 + return (uint32_t*)&rsp.RDRAM[address & 0xffffff];
308 +}
309 +
310 +void load_u8(uint8_t* dst, uint32_t address, size_t count)
311 +{
312 + while(count != 0)
313 + {
314 + *(dst++) = *dram_u8(address); address += 1;
315 + --count;
316 + }
317 +}
318 +
319 +void load_u16(uint16_t* dst, uint32_t address, size_t count)
320 +{
321 + while(count != 0)
322 + {
323 + *(dst++) = *dram_u16(address); address += 2;
324 + --count;
325 + }
326 +}
327 +
328 +void load_u32(uint32_t* dst, uint32_t address, size_t count)
329 +{
330 + /* Optimization for uint32_t */
331 + const uint32_t* src = dram_u32(address);
332 +
333 + memcpy(dst, src, count * sizeof(uint32_t));
334 +}
335 +
336 +void store_u16(const uint16_t* src, uint32_t address, size_t count)
337 +{
338 + while(count != 0)
339 + {
340 + *dram_u16(address) = *(src++); address += 2;
341 + --count;
342 + }
343 +}
344 +
345 +/**************************************************************************
346 + * MusyX audio ucode
347 + **************************************************************************/
348 +void musyx_task()
349 +{
350 + const OSTask_t * const task = get_task();
351 +
352 + uint32_t sfd_ptr = task->data_ptr;
353 + uint32_t sfd_count = task->data_size;
354 +
355 + DebugMessage(M64MSG_VERBOSE, "musyx_task: *data=%x, #SF=%d",
356 + sfd_ptr,
357 + sfd_count);
358 +
359 + uint32_t state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR);
360 +
361 + musyx_t musyx;
362 +
363 + /* load initial state */
364 + load_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL);
365 + load_u16((uint16_t*)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE);
366 + load_u16((uint16_t*)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, 4);
367 +
368 + for(;;)
369 + {
370 + /* parse SFD structre */
371 + /*uint16_t voice_count = *dram_u16(sfd_ptr + SFD_VOICE_COUNT);*/
372 + uint16_t sfx_index = *dram_u16(sfd_ptr + SFD_SFX_INDEX);
373 + uint32_t voice_mask = *dram_u32(sfd_ptr + SFD_VOICE_BITMASK);
374 + uint32_t sfx_ptr = *dram_u32(sfd_ptr + SFD_SFX_PTR);
375 + uint32_t voice_ptr = sfd_ptr + SFD_VOICES;
376 + uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE;
377 +
378 + /* initialize internal subframes using updated base volumes */
379 + update_base_vol(musyx.base_vol, voice_mask, last_sample_ptr);
380 + init_subframes(&musyx);
381 +
382 + /* active voices get mixed into L,R,cc0,e50 subframes (optional) */
383 + uint32_t output_ptr = voice_stage(&musyx, voice_ptr, last_sample_ptr);
384 +
385 + /* apply delay-based effects (optional) */
386 + sfx_stage(&musyx, sfx_ptr, sfx_index);
387 +
388 + /* emit interleaved L,R subframes */
389 + interleave_stage(&musyx, output_ptr);
390 +
391 + --sfd_count;
392 + if (sfd_count == 0)
393 + break;
394 +
395 + sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE;
396 + state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR);
397 + }
398 +
399 + /* writeback updated state */
400 + save_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL);
401 + store_u16((uint16_t*)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE);
402 + store_u16((uint16_t*)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, 4);
403 +}
404 +
405 +void load_base_vol(int32_t* base_vol, uint32_t address)
406 +{
407 + base_vol[0] = ((uint32_t)(*dram_u16(address )) << 16) | (*dram_u16(address + 8));
408 + base_vol[1] = ((uint32_t)(*dram_u16(address + 2)) << 16) | (*dram_u16(address + 10));
409 + base_vol[2] = ((uint32_t)(*dram_u16(address + 4)) << 16) | (*dram_u16(address + 12));
410 + base_vol[3] = ((uint32_t)(*dram_u16(address + 6)) << 16) | (*dram_u16(address + 14));
411 +}
412 +
413 +void save_base_vol(const int32_t* base_vol, uint32_t address)
414 +{
415 + unsigned k;
416 +
417 + for(k = 0; k < 4; ++k)
418 + {
419 + *dram_u16(address) = (uint16_t)(base_vol[k] >> 16); address += 2;
420 + }
421 +
422 + for(k = 0; k < 4; ++k)
423 + {
424 + *dram_u16(address) = (uint16_t)(base_vol[k]); address += 2;
425 + }
426 +}
427 +
428 +void update_base_vol(int32_t* base_vol, uint32_t voice_mask, uint32_t last_sample_ptr)
429 +{
430 + unsigned i, k;
431 + uint32_t mask;
432 +
433 + DebugMessage(M64MSG_VERBOSE, "base_vol voice_mask = %08x", voice_mask);
434 + DebugMessage(M64MSG_VERBOSE, "BEFORE: base_vol = %08x %08x %08x %08x",
435 + base_vol[0], base_vol[1], base_vol[2], base_vol[3]);
436 +
437 + /* optim: skip voices contributions entirely if voice_mask is empty */
438 + if (voice_mask != 0)
439 + {
440 + for(i = 0, mask = 1; i < MAX_VOICES; ++i, mask <<= 1, last_sample_ptr += 8)
441 + {
442 + if ((voice_mask & mask) == 0)
443 + continue;
444 +
445 + for(k = 0; k < 4; ++k)
446 + base_vol[k] += (int16_t)*dram_u16(last_sample_ptr + k * 2);
447 + }
448 + }
449 +
450 + /* apply 3% decay */
451 + for(k = 0; k < 4; ++k)
452 + {
453 + base_vol[k] = (base_vol[k] * 0x0000f850) >> 16;
454 + }
455 +
456 + DebugMessage(M64MSG_VERBOSE, "AFTER: base_vol = %08x %08x %08x %08x",
457 + base_vol[0], base_vol[1], base_vol[2], base_vol[3]);
458 +}
459 +
460 +void init_subframes(musyx_t* musyx)
461 +{
462 + unsigned i;
463 +
464 + int16_t base_cc0 = clamp_s16(musyx->base_vol[2]);
465 + int16_t base_e50 = clamp_s16(musyx->base_vol[3]);
466 +
467 + int16_t* left = musyx->left;
468 + int16_t* right = musyx->right;
469 + int16_t* cc0 = musyx->cc0;
470 + int16_t* e50 = musyx->e50;
471 +
472 + for(i = 0; i < SUBFRAME_SIZE; ++i)
473 + {
474 + *(e50++) = base_e50;
475 + *(left++) = clamp_s16( *cc0 + base_cc0);
476 + *(right++) = clamp_s16(-*cc0 - base_cc0);
477 + *(cc0++) = 0;
478 + }
479 +}
480 +
481 +/* Process voices, and returns interleaved subframe destination address */
482 +uint32_t voice_stage(musyx_t* musyx, uint32_t voice_ptr, uint32_t last_sample_ptr)
483 +{
484 + uint32_t output_ptr;
485 + int i = 0;
486 +
487 + /* voice stage can be skipped if first voice has no samples */
488 + if (*dram_u16(voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0)
489 + {
490 + DebugMessage(M64MSG_VERBOSE, "Skipping Voice stage");
491 + output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR);
492 + }
493 + /* otherwise process voices until a non null output_ptr is encountered */
494 + else for(;;)
495 + {
496 + DebugMessage(M64MSG_VERBOSE, "Processing Voice #%d", i);
497 +
498 + /* load voice samples (PCM16 or APDCM) */
499 + int16_t samples[SAMPLE_BUFFER_SIZE];
500 + unsigned segbase;
501 + unsigned offset;
502 +
503 + if (*dram_u8(voice_ptr + VOICE_ADPCM_FRAMES) == 0)
504 + {
505 + load_samples_PCM16(voice_ptr, samples, &segbase, &offset);
506 + }
507 + else
508 + {
509 + load_samples_ADPCM(voice_ptr, samples, &segbase, &offset);
510 + }
511 +
512 + /* mix them with each internal subframes */
513 + mix_voice_samples(musyx, voice_ptr, samples, segbase, offset, last_sample_ptr + i * 8);
514 +
515 + /* check break condition */
516 + output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR);
517 + if (output_ptr != 0) break;
518 +
519 + /* next voice */
520 + ++i;
521 + voice_ptr += VOICE_SIZE;
522 + }
523 +
524 + return output_ptr;
525 +}
526 +
527 +void dma_cat8(uint8_t* dst, uint32_t catsrc_ptr)
528 +{
529 + uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1);
530 + uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2);
531 + uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1);
532 + uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2);
533 +
534 + DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x",
535 + ptr1,
536 + ptr2,
537 + size1,
538 + size2);
539 +
540 + size_t count1 = size1;
541 + size_t count2 = size2;
542 +
543 + load_u8(dst, ptr1, count1);
544 +
545 + if (size2 == 0)
546 + return;
547 +
548 + load_u8(dst + count1, ptr2, count2);
549 +}
550 +
551 +void dma_cat16(uint16_t* dst, uint32_t catsrc_ptr)
552 +{
553 + uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1);
554 + uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2);
555 + uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1);
556 + uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2);
557 +
558 + DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x",
559 + ptr1,
560 + ptr2,
561 + size1,
562 + size2);
563 +
564 + size_t count1 = size1 >> 1;
565 + size_t count2 = size2 >> 1;
566 +
567 + load_u16(dst, ptr1, count1);
568 +
569 + if (size2 == 0)
570 + return;
571 +
572 + load_u16(dst + count1, ptr2, count2);
573 +}
574 +
575 +void load_samples_PCM16(uint32_t voice_ptr, int16_t* samples, unsigned* segbase, unsigned* offset)
576 +{
577 + DebugMessage(M64MSG_VERBOSE, "Format: PCM16");
578 +
579 + uint8_t u8_3e = *dram_u8 (voice_ptr + VOICE_SKIP_SAMPLES);
580 + uint16_t u16_40 = *dram_u16(voice_ptr + VOICE_U16_40);
581 + uint16_t u16_42 = *dram_u16(voice_ptr + VOICE_U16_42);
582 +
583 + unsigned count = align(u16_40 + u8_3e, 4);
584 +
585 + *segbase = SAMPLE_BUFFER_SIZE - count;
586 + *offset = u8_3e;
587 +
588 + dma_cat16((uint16_t*)samples + *segbase, voice_ptr + VOICE_CATSRC_0);
589 +
590 + if (u16_42 != 0)
591 + {
592 + dma_cat16((uint16_t*)samples, voice_ptr + VOICE_CATSRC_1);
593 + }
594 +}
595 +
596 +void load_samples_ADPCM(uint32_t voice_ptr, int16_t* samples, unsigned* segbase, unsigned* offset)
597 +{
598 + /* decompressed samples cannot exceed 0x400 bytes;
599 + * ADPCM has a compression ratio of 5/16 */
600 + uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16];
601 + int16_t adpcm_table[128];
602 +
603 + DebugMessage(M64MSG_VERBOSE, "Format: ADPCM");
604 +
605 + uint8_t u8_3c = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES );
606 + uint8_t u8_3d = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES + 1);
607 + uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES );
608 + uint8_t u8_3f = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES + 1);
609 + uint32_t adpcm_table_ptr = *dram_u32(voice_ptr + VOICE_ADPCM_TABLE_PTR);
610 +
611 + DebugMessage(M64MSG_VERBOSE, "Loading ADPCM table: %08x", adpcm_table_ptr);
612 + load_u16((uint16_t*)adpcm_table, adpcm_table_ptr, 128);
613 +
614 + unsigned count = u8_3c << 5;
615 +
616 + *segbase = SAMPLE_BUFFER_SIZE - count;
617 + *offset = u8_3e & 0x1f;
618 +
619 + dma_cat8(buffer, voice_ptr + VOICE_CATSRC_0);
620 + adpcm_decode_frames(samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e);
621 +
622 + if (u8_3d != 0)
623 + {
624 + dma_cat8(buffer, voice_ptr + VOICE_CATSRC_1);
625 + adpcm_decode_frames(samples, buffer, adpcm_table, u8_3d, u8_3f);
626 + }
627 +}
628 +
629 +void adpcm_decode_frames(int16_t* dst, const uint8_t* src, const int16_t* table, uint8_t count, uint8_t skip_samples)
630 +{
631 + DebugMessage(M64MSG_VERBOSE, "ADPCM decode: count=%d, skip=%d", count, skip_samples);
632 +
633 + int16_t frame[32];
634 + const uint8_t* nibbles = src + 8;
635 + unsigned i;
636 + bool jump_gap = false;
637 +
638 + if (skip_samples >= 32)
639 + {
640 + jump_gap = true;
641 + nibbles += 16;
642 + src += 4;
643 + }
644 +
645 + for(i = 0; i < count; ++i)
646 + {
647 + uint8_t code = nibbles[0];
648 +
649 + const int16_t* book = (code & 0xf0) + table;
650 + unsigned int rshift = (code & 0x0f);
651 +
652 + adpcm_get_predicted_frame(frame, src, nibbles, rshift);
653 +
654 + memcpy(dst, frame, 2 * sizeof(frame[0]));
655 + adpcm_decode_upto_8_samples(dst + 2, frame + 2, book, dst , 6);
656 + adpcm_decode_upto_8_samples(dst + 8, frame + 8, book, dst + 6, 8);
657 + adpcm_decode_upto_8_samples(dst + 16, frame + 16, book, dst + 14, 8);
658 + adpcm_decode_upto_8_samples(dst + 24, frame + 24, book, dst + 22, 8);
659 +
660 + if (jump_gap)
661 + {
662 + nibbles += 8;
663 + src += 32;
664 + }
665 +
666 + jump_gap = !jump_gap;
667 + nibbles += 16;
668 + src += 4;
669 + dst += 32;
670 + }
671 +}
672 +
673 +int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, unsigned lshift, unsigned rshift)
674 +{
675 + int16_t sample = ((uint16_t)byte & (uint16_t)mask) << lshift;
676 + sample >>= rshift; /* signed */
677 + return sample;
678 +}
679 +
680 +void adpcm_get_predicted_frame(int16_t* dst, const uint8_t* src, const uint8_t* nibbles, unsigned int rshift)
681 +{
682 + unsigned int i;
683 +
684 + *(dst++) = (src[0] << 8) | src[1];
685 + *(dst++) = (src[2] << 8) | src[3];
686 +
687 + for(i = 1; i < 16; ++i)
688 + {
689 + uint8_t byte = nibbles[i];
690 +
691 + *(dst++) = adpcm_get_predicted_sample(byte, 0xf0, 8, rshift);
692 + *(dst++) = adpcm_get_predicted_sample(byte, 0x0f, 12, rshift);
693 + }
694 +}
695 +
696 +void adpcm_decode_upto_8_samples(int16_t* dst, const int16_t* src,
697 + const int16_t* cb_entry, const int16_t* last_samples, size_t size)
698 +{
699 + const int16_t * const book1 = cb_entry;
700 + const int16_t * const book2 = cb_entry + 8;
701 +
702 + const int16_t l1 = last_samples[0];
703 + const int16_t l2 = last_samples[1];
704 +
705 + size_t i;
706 + int32_t accu;
707 +
708 + for(i = 0; i < size; ++i)
709 + {
710 + accu = (int32_t)src[i] << 11;
711 + accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src);
712 + dst[i] = clamp_s16(accu >> 11);
713 + }
714 +}
715 +
716 +void mix_voice_samples(musyx_t* musyx, uint32_t voice_ptr, const int16_t* samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr)
717 +{
718 + int i,k;
719 +
720 + /* parse VOICE structure */
721 + const uint16_t pitch_q16 = *dram_u16(voice_ptr + VOICE_PITCH_Q16);
722 + const uint16_t pitch_shift = *dram_u16(voice_ptr + VOICE_PITCH_SHIFT); // Q4.12
723 +
724 + const uint16_t end_point = *dram_u16(voice_ptr + VOICE_END_POINT);
725 + const uint16_t restart_point = *dram_u16(voice_ptr + VOICE_RESTART_POINT);
726 +
727 + const uint16_t u16_4e = *dram_u16(voice_ptr + VOICE_U16_4E);
728 +
729 + /* init values and pointers */
730 + const int16_t* sample = samples + segbase + offset + u16_4e;
731 + const int16_t* const sample_end = samples + segbase + end_point;
732 + const int16_t* const sample_restart = samples + (restart_point & 0x7fff) +
733 + (((restart_point & 0x8000) != 0) ? 0x000 : segbase);
734 +
735 +
736 + uint32_t pitch_accu = pitch_q16;
737 + uint32_t pitch_step = pitch_shift << 4;
738 +
739 + int32_t v4_env[4];
740 + int32_t v4_env_step[4];
741 + int16_t* v4_dst[4];
742 + int16_t v4[4];
743 +
744 + load_u32((uint32_t*)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4);
745 + load_u32((uint32_t*)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4);
746 +
747 + v4_dst[0] = musyx->left;
748 + v4_dst[1] = musyx->right;
749 + v4_dst[2] = musyx->cc0;
750 + v4_dst[3] = musyx->e50;
751 +
752 + DebugMessage(M64MSG_VERBOSE,
753 + "Voice debug: segbase=%d"
754 + "\tu16_4e=%04x\n"
755 + "\tpitch: frac0=%04x shift=%04x\n"
756 + "\tend_point=%04x restart_point=%04x\n"
757 + "\tenv = %08x %08x %08x %08x\n"
758 + "\tenv_step = %08x %08x %08x %08x\n",
759 + segbase,
760 + u16_4e,
761 + pitch_q16, pitch_shift,
762 + end_point, restart_point,
763 + v4_env[0], v4_env[1], v4_env[2], v4_env[3],
764 + v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]);
765 +
766 +
767 + for(i = 0; i < SUBFRAME_SIZE; ++i)
768 + {
769 + /* update sample and resample_lut pointers and then pitch_accu */
770 + sample += (pitch_accu >> 16);
771 + const int16_t* lut = (int16_t*)(ResampleLUT + ((pitch_accu & 0xfc00) >> 8));
772 + pitch_accu &= 0xffff;
773 + pitch_accu += pitch_step;
774 +
775 + /* handle end/restart points */
776 + int dist = sample - sample_end;
777 + if (dist >= 0) { sample = sample_restart + dist; }
778 +
779 + /* apply resample filter */
780 + int16_t v = clamp_s16(dot4(sample, lut));
781 +
782 + for(k = 0; k < 4; ++k)
783 + {
784 + /* envmix */
785 + int32_t accu = (v * (v4_env[k] >> 16)) >> 15;
786 + v4[k] = clamp_s16(accu);
787 + *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k]));
788 +
789 + /* update envelopes and dst pointers */
790 + ++(v4_dst[k]);
791 + v4_env[k] += v4_env_step[k];
792 + }
793 + }
794 +
795 + /* save last resampled sample */
796 + store_u16((uint16_t*)v4, last_sample_ptr, 4);
797 +
798 + DebugMessage(M64MSG_VERBOSE, "last_sample = %04x %04x %04x %04x",
799 + v4[0], v4[1], v4[2], v4[3]);
800 +}
801 +
802 +
803 +void sfx_stage(musyx_t* musyx, uint32_t sfx_ptr, uint16_t idx)
804 +{
805 + DebugMessage(M64MSG_VERBOSE, "SFX: %08x, idx=%d", sfx_ptr, idx);
806 +
807 + if (sfx_ptr == 0)
808 + return;
809 +
810 + /* load sfx parameters */
811 + uint32_t tap_delays[8];
812 + uint16_t tap_gains[8];
813 + uint16_t fir4_hcoeffs[4];
814 +
815 + uint32_t cbuffer_ptr = *dram_u32(sfx_ptr + SFX_CBUFFER_PTR);
816 + uint32_t cbuffer_length = *dram_u32(sfx_ptr + SFX_CBUFFER_LENGTH);
817 +
818 + uint16_t tap_count = *dram_u16(sfx_ptr + SFX_TAP_COUNT);
819 + load_u32(tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8);
820 + load_u16(tap_gains, sfx_ptr + SFX_TAP_GAINS, 8);
821 +
822 + uint16_t fir4_hgain = *dram_u16(sfx_ptr + SFX_FIR4_HGAIN);
823 + load_u16(fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4);
824 +
825 +
826 + DebugMessage(M64MSG_VERBOSE, "cbuffer: ptr=%08x length=%d", cbuffer_ptr, cbuffer_length);
827 +
828 + DebugMessage(M64MSG_VERBOSE, "fir4: hgain=%04x hcoeff=%04x %04x %04x %04x",
829 + fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], fir4_hcoeffs[3]);
830 +
831 + DebugMessage(M64MSG_VERBOSE, "tap count=%d", tap_count);
832 + // TODO:
833 +}
834 +
835 +void interleave_stage(musyx_t* musyx, uint32_t output_ptr)
836 +{
837 + DebugMessage(M64MSG_VERBOSE, "interleave: %08x", output_ptr);
838 +
839 + size_t i;
840 +
841 + int16_t base_left = clamp_s16(musyx->base_vol[0]);
842 + int16_t base_right = clamp_s16(musyx->base_vol[1]);
843 +
844 + int16_t* left = musyx->left;
845 + int16_t* right = musyx->right;
846 + uint32_t* dst = dram_u32(output_ptr);
847 +
848 + for(i = 0; i < SUBFRAME_SIZE; ++i)
849 + {
850 + uint16_t l = clamp_s16(*(left++) + base_left );
851 + uint16_t r = clamp_s16(*(right++) + base_right);
852 +
853 + *(dst++) = (l << 16) | r;
854 + }
855 +}
856 +
857 diff --git a/src/musyx.h b/src/musyx.h
858 new file mode 100644
859 index 0000000000000000000000000000000000000000..c3bf5aee1bbec643567126a09c365d5613b1e7b8
860 --- /dev/null
861 +++ b/src/musyx.h
862 @@ -0,0 +1,28 @@
863 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
864 + * Mupen64plus-rsp-hle - musyx.h *
865 + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
866 + * Copyright (C) 2013 Bobby Smiles *
867 + * *
868 + * This program is free software; you can redistribute it and/or modify *
869 + * it under the terms of the GNU General Public License as published by *
870 + * the Free Software Foundation; either version 2 of the License, or *
871 + * (at your option) any later version. *
872 + * *
873 + * This program is distributed in the hope that it will be useful, *
874 + * but WITHOUT ANY WARRANTY; without even the implied warranty of *
875 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
876 + * GNU General Public License for more details. *
877 + * *
878 + * You should have received a copy of the GNU General Public License *
879 + * along with this program; if not, write to the *
880 + * Free Software Foundation, Inc., *
881 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
882 + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
883 +
884 +#ifndef MUSYX_H
885 +#define MUSYX_H
886 +
887 +void musyx_task();
888 +
889 +#endif
890 +
11 jpeg_memory_alloc_reduce.patch
22 jpeg_memory_alloc_stack.patch
33 jpeg_memory_alloc_refactor.patch
4 musyx_initial.patch