diff options
author | JP Cimalando <jpcima@users.noreply.github.com> | 2019-02-06 22:22:03 +0100 |
---|---|---|
committer | JP Cimalando <jpcima@users.noreply.github.com> | 2019-02-06 23:02:25 +0100 |
commit | f5f850325b6aacb44d9f07dac53e55f2708833ba (patch) | |
tree | c9bb52a1670e58b362a850e3727f3927399086af | |
parent | 307678d1831434b9565c82960aae3b97d37f71df (diff) | |
download | libADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.tar.gz libADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.tar.bz2 libADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.zip |
Nuked OPL 1.8 optimizations port
-rw-r--r-- | src/chips/nuked/nukedopl3.c | 224 | ||||
-rw-r--r-- | src/chips/nuked/nukedopl3.h | 9 |
2 files changed, 229 insertions, 4 deletions
diff --git a/src/chips/nuked/nukedopl3.c b/src/chips/nuked/nukedopl3.c index 267e67a..ef8291a 100644 --- a/src/chips/nuked/nukedopl3.c +++ b/src/chips/nuked/nukedopl3.c @@ -53,6 +53,117 @@ enum { }; +#if OPL_FAST_WAVEGEN +/* + * logsin table + */ + +static const Bit16u logsinrom[512] = { + 0x859, 0x6c3, 0x607, 0x58b, 0x52e, 0x4e4, 0x4a6, 0x471, + 0x443, 0x41a, 0x3f5, 0x3d3, 0x3b5, 0x398, 0x37e, 0x365, + 0x34e, 0x339, 0x324, 0x311, 0x2ff, 0x2ed, 0x2dc, 0x2cd, + 0x2bd, 0x2af, 0x2a0, 0x293, 0x286, 0x279, 0x26d, 0x261, + 0x256, 0x24b, 0x240, 0x236, 0x22c, 0x222, 0x218, 0x20f, + 0x206, 0x1fd, 0x1f5, 0x1ec, 0x1e4, 0x1dc, 0x1d4, 0x1cd, + 0x1c5, 0x1be, 0x1b7, 0x1b0, 0x1a9, 0x1a2, 0x19b, 0x195, + 0x18f, 0x188, 0x182, 0x17c, 0x177, 0x171, 0x16b, 0x166, + 0x160, 0x15b, 0x155, 0x150, 0x14b, 0x146, 0x141, 0x13c, + 0x137, 0x133, 0x12e, 0x129, 0x125, 0x121, 0x11c, 0x118, + 0x114, 0x10f, 0x10b, 0x107, 0x103, 0x0ff, 0x0fb, 0x0f8, + 0x0f4, 0x0f0, 0x0ec, 0x0e9, 0x0e5, 0x0e2, 0x0de, 0x0db, + 0x0d7, 0x0d4, 0x0d1, 0x0cd, 0x0ca, 0x0c7, 0x0c4, 0x0c1, + 0x0be, 0x0bb, 0x0b8, 0x0b5, 0x0b2, 0x0af, 0x0ac, 0x0a9, + 0x0a7, 0x0a4, 0x0a1, 0x09f, 0x09c, 0x099, 0x097, 0x094, + 0x092, 0x08f, 0x08d, 0x08a, 0x088, 0x086, 0x083, 0x081, + 0x07f, 0x07d, 0x07a, 0x078, 0x076, 0x074, 0x072, 0x070, + 0x06e, 0x06c, 0x06a, 0x068, 0x066, 0x064, 0x062, 0x060, + 0x05e, 0x05c, 0x05b, 0x059, 0x057, 0x055, 0x053, 0x052, + 0x050, 0x04e, 0x04d, 0x04b, 0x04a, 0x048, 0x046, 0x045, + 0x043, 0x042, 0x040, 0x03f, 0x03e, 0x03c, 0x03b, 0x039, + 0x038, 0x037, 0x035, 0x034, 0x033, 0x031, 0x030, 0x02f, + 0x02e, 0x02d, 0x02b, 0x02a, 0x029, 0x028, 0x027, 0x026, + 0x025, 0x024, 0x023, 0x022, 0x021, 0x020, 0x01f, 0x01e, + 0x01d, 0x01c, 0x01b, 0x01a, 0x019, 0x018, 0x017, 0x017, + 0x016, 0x015, 0x014, 0x014, 0x013, 0x012, 0x011, 0x011, + 0x010, 0x00f, 0x00f, 0x00e, 0x00d, 0x00d, 0x00c, 0x00c, + 0x00b, 0x00a, 0x00a, 0x009, 0x009, 0x008, 0x008, 0x007, + 0x007, 0x007, 0x006, 0x006, 0x005, 0x005, 0x005, 0x004, + 0x004, 0x004, 0x003, 0x003, 0x003, 0x002, 0x002, 0x002, + 0x002, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, + 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, + 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x002, + 0x002, 0x002, 0x002, 0x003, 0x003, 0x003, 0x004, 0x004, + 0x004, 0x005, 0x005, 0x005, 0x006, 0x006, 0x007, 0x007, + 0x007, 0x008, 0x008, 0x009, 0x009, 0x00a, 0x00a, 0x00b, + 0x00c, 0x00c, 0x00d, 0x00d, 0x00e, 0x00f, 0x00f, 0x010, + 0x011, 0x011, 0x012, 0x013, 0x014, 0x014, 0x015, 0x016, + 0x017, 0x017, 0x018, 0x019, 0x01a, 0x01b, 0x01c, 0x01d, + 0x01e, 0x01f, 0x020, 0x021, 0x022, 0x023, 0x024, 0x025, + 0x026, 0x027, 0x028, 0x029, 0x02a, 0x02b, 0x02d, 0x02e, + 0x02f, 0x030, 0x031, 0x033, 0x034, 0x035, 0x037, 0x038, + 0x039, 0x03b, 0x03c, 0x03e, 0x03f, 0x040, 0x042, 0x043, + 0x045, 0x046, 0x048, 0x04a, 0x04b, 0x04d, 0x04e, 0x050, + 0x052, 0x053, 0x055, 0x057, 0x059, 0x05b, 0x05c, 0x05e, + 0x060, 0x062, 0x064, 0x066, 0x068, 0x06a, 0x06c, 0x06e, + 0x070, 0x072, 0x074, 0x076, 0x078, 0x07a, 0x07d, 0x07f, + 0x081, 0x083, 0x086, 0x088, 0x08a, 0x08d, 0x08f, 0x092, + 0x094, 0x097, 0x099, 0x09c, 0x09f, 0x0a1, 0x0a4, 0x0a7, + 0x0a9, 0x0ac, 0x0af, 0x0b2, 0x0b5, 0x0b8, 0x0bb, 0x0be, + 0x0c1, 0x0c4, 0x0c7, 0x0ca, 0x0cd, 0x0d1, 0x0d4, 0x0d7, + 0x0db, 0x0de, 0x0e2, 0x0e5, 0x0e9, 0x0ec, 0x0f0, 0x0f4, + 0x0f8, 0x0fb, 0x0ff, 0x103, 0x107, 0x10b, 0x10f, 0x114, + 0x118, 0x11c, 0x121, 0x125, 0x129, 0x12e, 0x133, 0x137, + 0x13c, 0x141, 0x146, 0x14b, 0x150, 0x155, 0x15b, 0x160, + 0x166, 0x16b, 0x171, 0x177, 0x17c, 0x182, 0x188, 0x18f, + 0x195, 0x19b, 0x1a2, 0x1a9, 0x1b0, 0x1b7, 0x1be, 0x1c5, + 0x1cd, 0x1d4, 0x1dc, 0x1e4, 0x1ec, 0x1f5, 0x1fd, 0x206, + 0x20f, 0x218, 0x222, 0x22c, 0x236, 0x240, 0x24b, 0x256, + 0x261, 0x26d, 0x279, 0x286, 0x293, 0x2a0, 0x2af, 0x2bd, + 0x2cd, 0x2dc, 0x2ed, 0x2ff, 0x311, 0x324, 0x339, 0x34e, + 0x365, 0x37e, 0x398, 0x3b5, 0x3d3, 0x3f5, 0x41a, 0x443, + 0x471, 0x4a6, 0x4e4, 0x52e, 0x58b, 0x607, 0x6c3, 0x859 +}; + +/* + * exp table + */ + +static const Bit16u exprom[256] = { + 0xff4, 0xfea, 0xfde, 0xfd4, 0xfc8, 0xfbe, 0xfb4, 0xfa8, + 0xf9e, 0xf92, 0xf88, 0xf7e, 0xf72, 0xf68, 0xf5c, 0xf52, + 0xf48, 0xf3e, 0xf32, 0xf28, 0xf1e, 0xf14, 0xf08, 0xefe, + 0xef4, 0xeea, 0xee0, 0xed4, 0xeca, 0xec0, 0xeb6, 0xeac, + 0xea2, 0xe98, 0xe8e, 0xe84, 0xe7a, 0xe70, 0xe66, 0xe5c, + 0xe52, 0xe48, 0xe3e, 0xe34, 0xe2a, 0xe20, 0xe16, 0xe0c, + 0xe04, 0xdfa, 0xdf0, 0xde6, 0xddc, 0xdd2, 0xdca, 0xdc0, + 0xdb6, 0xdac, 0xda4, 0xd9a, 0xd90, 0xd88, 0xd7e, 0xd74, + 0xd6a, 0xd62, 0xd58, 0xd50, 0xd46, 0xd3c, 0xd34, 0xd2a, + 0xd22, 0xd18, 0xd10, 0xd06, 0xcfe, 0xcf4, 0xcec, 0xce2, + 0xcda, 0xcd0, 0xcc8, 0xcbe, 0xcb6, 0xcae, 0xca4, 0xc9c, + 0xc92, 0xc8a, 0xc82, 0xc78, 0xc70, 0xc68, 0xc60, 0xc56, + 0xc4e, 0xc46, 0xc3c, 0xc34, 0xc2c, 0xc24, 0xc1c, 0xc12, + 0xc0a, 0xc02, 0xbfa, 0xbf2, 0xbea, 0xbe0, 0xbd8, 0xbd0, + 0xbc8, 0xbc0, 0xbb8, 0xbb0, 0xba8, 0xba0, 0xb98, 0xb90, + 0xb88, 0xb80, 0xb78, 0xb70, 0xb68, 0xb60, 0xb58, 0xb50, + 0xb48, 0xb40, 0xb38, 0xb32, 0xb2a, 0xb22, 0xb1a, 0xb12, + 0xb0a, 0xb02, 0xafc, 0xaf4, 0xaec, 0xae4, 0xade, 0xad6, + 0xace, 0xac6, 0xac0, 0xab8, 0xab0, 0xaa8, 0xaa2, 0xa9a, + 0xa92, 0xa8c, 0xa84, 0xa7c, 0xa76, 0xa6e, 0xa68, 0xa60, + 0xa58, 0xa52, 0xa4a, 0xa44, 0xa3c, 0xa36, 0xa2e, 0xa28, + 0xa20, 0xa18, 0xa12, 0xa0c, 0xa04, 0x9fe, 0x9f6, 0x9f0, + 0x9e8, 0x9e2, 0x9da, 0x9d4, 0x9ce, 0x9c6, 0x9c0, 0x9b8, + 0x9b2, 0x9ac, 0x9a4, 0x99e, 0x998, 0x990, 0x98a, 0x984, + 0x97c, 0x976, 0x970, 0x96a, 0x962, 0x95c, 0x956, 0x950, + 0x948, 0x942, 0x93c, 0x936, 0x930, 0x928, 0x922, 0x91c, + 0x916, 0x910, 0x90a, 0x904, 0x8fc, 0x8f6, 0x8f0, 0x8ea, + 0x8e4, 0x8de, 0x8d8, 0x8d2, 0x8cc, 0x8c6, 0x8c0, 0x8ba, + 0x8b4, 0x8ae, 0x8a8, 0x8a2, 0x89c, 0x896, 0x890, 0x88a, + 0x884, 0x87e, 0x878, 0x872, 0x86c, 0x866, 0x860, 0x85a, + 0x854, 0x850, 0x84a, 0x844, 0x83e, 0x838, 0x832, 0x82c, + 0x828, 0x822, 0x81c, 0x816, 0x810, 0x80c, 0x806, 0x800 +}; +#else /* * logsin table */ @@ -130,6 +241,7 @@ static const Bit16u exprom[256] = { 0x42a, 0x428, 0x425, 0x422, 0x41f, 0x41c, 0x419, 0x416, 0x414, 0x411, 0x40e, 0x40b, 0x408, 0x406, 0x403, 0x400 }; +#endif /* * freq mult table multiplied by 2 @@ -207,8 +319,8 @@ static const Bit16u panlawtable[] = * Envelope generator */ +#if !OPL_FAST_WAVEGEN typedef Bit16s(*envelope_sinfunc)(Bit16u phase, Bit16u envelope); -typedef void(*envelope_genfunc)(opl3_slot *slott); static Bit16s OPL3_EnvelopeCalcExp(Bit32u level) { @@ -366,6 +478,7 @@ static const envelope_sinfunc envelope_sin[8] = { OPL3_EnvelopeCalcSin6, OPL3_EnvelopeCalcSin7 }; +#endif enum envelope_gen_num { @@ -401,6 +514,15 @@ static void OPL3_EnvelopeCalc(opl3_slot *slot) Bit8u reset = 0; slot->eg_out = slot->eg_rout + (slot->reg_tl << 2) + (slot->eg_ksl >> kslshift[slot->reg_ksl]) + *slot->trem; + +#if OPL_FAST_WAVEGEN + if (slot->eg_out > 0x1ff) + { + slot->eg_out = 0x1ff; + } + slot->eg_out <<= 3; +#endif + if (slot->key && slot->eg_gen == envelope_gen_num_release) { reset = 1; @@ -687,12 +809,92 @@ static void OPL3_SlotWriteE0(opl3_slot *slot, Bit8u data) { slot->reg_wf &= 0x03; } + +#if OPL_FAST_WAVEGEN + switch (slot->reg_wf) + { + case 1: + case 4: + case 5: + slot->maskzero = 0x200; + break; + case 3: + slot->maskzero = 0x100; + break; + default: + slot->maskzero = 0; + break; + } + + switch (slot->reg_wf) + { + case 4: + slot->signpos = (31-8); /* sigext of (phase & 0x100) */ + break; + case 0: + case 6: + case 7: + slot->signpos = (31-9); /* sigext of (phase & 0x200) */ + break; + default: + slot->signpos = (31-16); /* set "neg" to zero */ + break; + } + + switch (slot->reg_wf) + { + case 4: + case 5: + slot->phaseshift = 1; + break; + case 6: + slot->phaseshift = 16; /* set phase to zero and flag for non-sin wave */ + break; + case 7: + slot->phaseshift = 32; /* no shift (work by mod 32), but flag for non-sin wave */ + break; + default: + slot->phaseshift = 0; + break; + } +#endif } +#if OPL_FAST_WAVEGEN +static void OPL3_SlotGenerate(opl3_slot *slot) +{ + Bit16u phase = slot->pg_phase_out + *slot->mod; + Bit32u neg, level; + Bit8u phaseshift; + + /* Fast paths for mute segments */ + if (phase & slot->maskzero) + { + slot->out = 0; + return; + } + + neg = (Bit32s)((Bit32u)phase << slot->signpos) >> 31; + phaseshift = slot->phaseshift; + level = slot->eg_out; + + phase <<= phaseshift; + if (phaseshift <= 1) + { + level += logsinrom[phase & 0x1ff]; + } + else + { + level += ((phase ^ neg) & 0x3ff) << 3; + } + slot->out = exprom[level & 0xff] >> (level >> 8) ^ neg; +} +#else static void OPL3_SlotGenerate(opl3_slot *slot) { slot->out = envelope_sin[slot->reg_wf](slot->pg_phase_out + *slot->mod, slot->eg_out); } +#endif static void OPL3_SlotCalcFB(opl3_slot *slot) { @@ -1176,7 +1378,7 @@ void OPL3_Generate(opl3_chip *chip, Bit16s *buf) if (chip->eg_timerrem || chip->eg_state) { - if (chip->eg_timer == (uint64_t)0xfffffffffU) + if (chip->eg_timer == 0xfffffffffULL) { chip->eg_timer = 0; chip->eg_timerrem = 1; @@ -1231,10 +1433,17 @@ void OPL3_Reset(opl3_chip *chip, Bit32u samplerate) chip->slot[slotnum].chip = chip; chip->slot[slotnum].mod = &chip->zeromod; chip->slot[slotnum].eg_rout = 0x1ff; +#if OPL_FAST_WAVEGEN + chip->slot[slotnum].eg_out = 0x1ff << 3; +#else chip->slot[slotnum].eg_out = 0x1ff; +#endif chip->slot[slotnum].eg_gen = envelope_gen_num_release; chip->slot[slotnum].trem = (Bit8u*)&chip->zeromod; chip->slot[slotnum].slot_num = slotnum; +#if OPL_FAST_WAVEGEN + chip->slot[slotnum].signpos = (31-9); /* for wf=0 need use sigext of (phase & 0x200) */ +#endif } for (channum = 0; channum < 18; channum++) { @@ -1420,16 +1629,23 @@ void OPL3_GenerateStream(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples) } } +#define OPL3_MIN(A, B) (((A) > (B)) ? (B) : (A)) +#define OPL3_MAX(A, B) (((A) < (B)) ? (B) : (A)) +#define OPL3_CLAMP(V, MIN, MAX) OPL3_MAX(OPL3_MIN(V, MAX), MIN) + void OPL3_GenerateStreamMix(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples) { Bit32u i; Bit16s sample[2]; + Bit32s mix[2]; for(i = 0; i < numsamples; i++) { OPL3_GenerateResampled(chip, sample); - sndptr[0] += sample[0]; - sndptr[1] += sample[1]; + mix[0] = sndptr[0] + sample[0]; + mix[1] = sndptr[1] + sample[1]; + sndptr[0] = OPL3_CLAMP(mix[0], INT16_MIN, INT16_MAX); + sndptr[1] = OPL3_CLAMP(mix[1], INT16_MIN, INT16_MAX); sndptr += 2; } } diff --git a/src/chips/nuked/nukedopl3.h b/src/chips/nuked/nukedopl3.h index 268e8de..9849569 100644 --- a/src/chips/nuked/nukedopl3.h +++ b/src/chips/nuked/nukedopl3.h @@ -33,13 +33,16 @@ #define OPL_OPL3_H #include <inttypes.h> +#include <stdint.h> #ifdef __cplusplus extern "C" { #endif + #define OPL_WRITEBUF_SIZE 1024 #define OPL_WRITEBUF_DELAY 2 +#define OPL_FAST_WAVEGEN 1 /* optimized waveform generation */ typedef uintptr_t Bitu; typedef intptr_t Bits; @@ -86,6 +89,12 @@ struct _opl3_slot { Bit32u pg_phase; Bit16u pg_phase_out; Bit8u slot_num; + +#if OPL_FAST_WAVEGEN + Bit16u maskzero; + Bit8u signpos; + Bit8u phaseshift; +#endif }; struct _opl3_channel { |