aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJP Cimalando <jpcima@users.noreply.github.com>2019-02-06 22:22:03 +0100
committerJP Cimalando <jpcima@users.noreply.github.com>2019-02-06 23:02:25 +0100
commitf5f850325b6aacb44d9f07dac53e55f2708833ba (patch)
treec9bb52a1670e58b362a850e3727f3927399086af
parent307678d1831434b9565c82960aae3b97d37f71df (diff)
downloadlibADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.tar.gz
libADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.tar.bz2
libADLMIDI-f5f850325b6aacb44d9f07dac53e55f2708833ba.zip
Nuked OPL 1.8 optimizations port
-rw-r--r--src/chips/nuked/nukedopl3.c224
-rw-r--r--src/chips/nuked/nukedopl3.h9
2 files changed, 229 insertions, 4 deletions
diff --git a/src/chips/nuked/nukedopl3.c b/src/chips/nuked/nukedopl3.c
index 267e67a..ef8291a 100644
--- a/src/chips/nuked/nukedopl3.c
+++ b/src/chips/nuked/nukedopl3.c
@@ -53,6 +53,117 @@ enum {
};
+#if OPL_FAST_WAVEGEN
+/*
+ * logsin table
+ */
+
+static const Bit16u logsinrom[512] = {
+ 0x859, 0x6c3, 0x607, 0x58b, 0x52e, 0x4e4, 0x4a6, 0x471,
+ 0x443, 0x41a, 0x3f5, 0x3d3, 0x3b5, 0x398, 0x37e, 0x365,
+ 0x34e, 0x339, 0x324, 0x311, 0x2ff, 0x2ed, 0x2dc, 0x2cd,
+ 0x2bd, 0x2af, 0x2a0, 0x293, 0x286, 0x279, 0x26d, 0x261,
+ 0x256, 0x24b, 0x240, 0x236, 0x22c, 0x222, 0x218, 0x20f,
+ 0x206, 0x1fd, 0x1f5, 0x1ec, 0x1e4, 0x1dc, 0x1d4, 0x1cd,
+ 0x1c5, 0x1be, 0x1b7, 0x1b0, 0x1a9, 0x1a2, 0x19b, 0x195,
+ 0x18f, 0x188, 0x182, 0x17c, 0x177, 0x171, 0x16b, 0x166,
+ 0x160, 0x15b, 0x155, 0x150, 0x14b, 0x146, 0x141, 0x13c,
+ 0x137, 0x133, 0x12e, 0x129, 0x125, 0x121, 0x11c, 0x118,
+ 0x114, 0x10f, 0x10b, 0x107, 0x103, 0x0ff, 0x0fb, 0x0f8,
+ 0x0f4, 0x0f0, 0x0ec, 0x0e9, 0x0e5, 0x0e2, 0x0de, 0x0db,
+ 0x0d7, 0x0d4, 0x0d1, 0x0cd, 0x0ca, 0x0c7, 0x0c4, 0x0c1,
+ 0x0be, 0x0bb, 0x0b8, 0x0b5, 0x0b2, 0x0af, 0x0ac, 0x0a9,
+ 0x0a7, 0x0a4, 0x0a1, 0x09f, 0x09c, 0x099, 0x097, 0x094,
+ 0x092, 0x08f, 0x08d, 0x08a, 0x088, 0x086, 0x083, 0x081,
+ 0x07f, 0x07d, 0x07a, 0x078, 0x076, 0x074, 0x072, 0x070,
+ 0x06e, 0x06c, 0x06a, 0x068, 0x066, 0x064, 0x062, 0x060,
+ 0x05e, 0x05c, 0x05b, 0x059, 0x057, 0x055, 0x053, 0x052,
+ 0x050, 0x04e, 0x04d, 0x04b, 0x04a, 0x048, 0x046, 0x045,
+ 0x043, 0x042, 0x040, 0x03f, 0x03e, 0x03c, 0x03b, 0x039,
+ 0x038, 0x037, 0x035, 0x034, 0x033, 0x031, 0x030, 0x02f,
+ 0x02e, 0x02d, 0x02b, 0x02a, 0x029, 0x028, 0x027, 0x026,
+ 0x025, 0x024, 0x023, 0x022, 0x021, 0x020, 0x01f, 0x01e,
+ 0x01d, 0x01c, 0x01b, 0x01a, 0x019, 0x018, 0x017, 0x017,
+ 0x016, 0x015, 0x014, 0x014, 0x013, 0x012, 0x011, 0x011,
+ 0x010, 0x00f, 0x00f, 0x00e, 0x00d, 0x00d, 0x00c, 0x00c,
+ 0x00b, 0x00a, 0x00a, 0x009, 0x009, 0x008, 0x008, 0x007,
+ 0x007, 0x007, 0x006, 0x006, 0x005, 0x005, 0x005, 0x004,
+ 0x004, 0x004, 0x003, 0x003, 0x003, 0x002, 0x002, 0x002,
+ 0x002, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001,
+ 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+ 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+ 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x002,
+ 0x002, 0x002, 0x002, 0x003, 0x003, 0x003, 0x004, 0x004,
+ 0x004, 0x005, 0x005, 0x005, 0x006, 0x006, 0x007, 0x007,
+ 0x007, 0x008, 0x008, 0x009, 0x009, 0x00a, 0x00a, 0x00b,
+ 0x00c, 0x00c, 0x00d, 0x00d, 0x00e, 0x00f, 0x00f, 0x010,
+ 0x011, 0x011, 0x012, 0x013, 0x014, 0x014, 0x015, 0x016,
+ 0x017, 0x017, 0x018, 0x019, 0x01a, 0x01b, 0x01c, 0x01d,
+ 0x01e, 0x01f, 0x020, 0x021, 0x022, 0x023, 0x024, 0x025,
+ 0x026, 0x027, 0x028, 0x029, 0x02a, 0x02b, 0x02d, 0x02e,
+ 0x02f, 0x030, 0x031, 0x033, 0x034, 0x035, 0x037, 0x038,
+ 0x039, 0x03b, 0x03c, 0x03e, 0x03f, 0x040, 0x042, 0x043,
+ 0x045, 0x046, 0x048, 0x04a, 0x04b, 0x04d, 0x04e, 0x050,
+ 0x052, 0x053, 0x055, 0x057, 0x059, 0x05b, 0x05c, 0x05e,
+ 0x060, 0x062, 0x064, 0x066, 0x068, 0x06a, 0x06c, 0x06e,
+ 0x070, 0x072, 0x074, 0x076, 0x078, 0x07a, 0x07d, 0x07f,
+ 0x081, 0x083, 0x086, 0x088, 0x08a, 0x08d, 0x08f, 0x092,
+ 0x094, 0x097, 0x099, 0x09c, 0x09f, 0x0a1, 0x0a4, 0x0a7,
+ 0x0a9, 0x0ac, 0x0af, 0x0b2, 0x0b5, 0x0b8, 0x0bb, 0x0be,
+ 0x0c1, 0x0c4, 0x0c7, 0x0ca, 0x0cd, 0x0d1, 0x0d4, 0x0d7,
+ 0x0db, 0x0de, 0x0e2, 0x0e5, 0x0e9, 0x0ec, 0x0f0, 0x0f4,
+ 0x0f8, 0x0fb, 0x0ff, 0x103, 0x107, 0x10b, 0x10f, 0x114,
+ 0x118, 0x11c, 0x121, 0x125, 0x129, 0x12e, 0x133, 0x137,
+ 0x13c, 0x141, 0x146, 0x14b, 0x150, 0x155, 0x15b, 0x160,
+ 0x166, 0x16b, 0x171, 0x177, 0x17c, 0x182, 0x188, 0x18f,
+ 0x195, 0x19b, 0x1a2, 0x1a9, 0x1b0, 0x1b7, 0x1be, 0x1c5,
+ 0x1cd, 0x1d4, 0x1dc, 0x1e4, 0x1ec, 0x1f5, 0x1fd, 0x206,
+ 0x20f, 0x218, 0x222, 0x22c, 0x236, 0x240, 0x24b, 0x256,
+ 0x261, 0x26d, 0x279, 0x286, 0x293, 0x2a0, 0x2af, 0x2bd,
+ 0x2cd, 0x2dc, 0x2ed, 0x2ff, 0x311, 0x324, 0x339, 0x34e,
+ 0x365, 0x37e, 0x398, 0x3b5, 0x3d3, 0x3f5, 0x41a, 0x443,
+ 0x471, 0x4a6, 0x4e4, 0x52e, 0x58b, 0x607, 0x6c3, 0x859
+};
+
+/*
+ * exp table
+ */
+
+static const Bit16u exprom[256] = {
+ 0xff4, 0xfea, 0xfde, 0xfd4, 0xfc8, 0xfbe, 0xfb4, 0xfa8,
+ 0xf9e, 0xf92, 0xf88, 0xf7e, 0xf72, 0xf68, 0xf5c, 0xf52,
+ 0xf48, 0xf3e, 0xf32, 0xf28, 0xf1e, 0xf14, 0xf08, 0xefe,
+ 0xef4, 0xeea, 0xee0, 0xed4, 0xeca, 0xec0, 0xeb6, 0xeac,
+ 0xea2, 0xe98, 0xe8e, 0xe84, 0xe7a, 0xe70, 0xe66, 0xe5c,
+ 0xe52, 0xe48, 0xe3e, 0xe34, 0xe2a, 0xe20, 0xe16, 0xe0c,
+ 0xe04, 0xdfa, 0xdf0, 0xde6, 0xddc, 0xdd2, 0xdca, 0xdc0,
+ 0xdb6, 0xdac, 0xda4, 0xd9a, 0xd90, 0xd88, 0xd7e, 0xd74,
+ 0xd6a, 0xd62, 0xd58, 0xd50, 0xd46, 0xd3c, 0xd34, 0xd2a,
+ 0xd22, 0xd18, 0xd10, 0xd06, 0xcfe, 0xcf4, 0xcec, 0xce2,
+ 0xcda, 0xcd0, 0xcc8, 0xcbe, 0xcb6, 0xcae, 0xca4, 0xc9c,
+ 0xc92, 0xc8a, 0xc82, 0xc78, 0xc70, 0xc68, 0xc60, 0xc56,
+ 0xc4e, 0xc46, 0xc3c, 0xc34, 0xc2c, 0xc24, 0xc1c, 0xc12,
+ 0xc0a, 0xc02, 0xbfa, 0xbf2, 0xbea, 0xbe0, 0xbd8, 0xbd0,
+ 0xbc8, 0xbc0, 0xbb8, 0xbb0, 0xba8, 0xba0, 0xb98, 0xb90,
+ 0xb88, 0xb80, 0xb78, 0xb70, 0xb68, 0xb60, 0xb58, 0xb50,
+ 0xb48, 0xb40, 0xb38, 0xb32, 0xb2a, 0xb22, 0xb1a, 0xb12,
+ 0xb0a, 0xb02, 0xafc, 0xaf4, 0xaec, 0xae4, 0xade, 0xad6,
+ 0xace, 0xac6, 0xac0, 0xab8, 0xab0, 0xaa8, 0xaa2, 0xa9a,
+ 0xa92, 0xa8c, 0xa84, 0xa7c, 0xa76, 0xa6e, 0xa68, 0xa60,
+ 0xa58, 0xa52, 0xa4a, 0xa44, 0xa3c, 0xa36, 0xa2e, 0xa28,
+ 0xa20, 0xa18, 0xa12, 0xa0c, 0xa04, 0x9fe, 0x9f6, 0x9f0,
+ 0x9e8, 0x9e2, 0x9da, 0x9d4, 0x9ce, 0x9c6, 0x9c0, 0x9b8,
+ 0x9b2, 0x9ac, 0x9a4, 0x99e, 0x998, 0x990, 0x98a, 0x984,
+ 0x97c, 0x976, 0x970, 0x96a, 0x962, 0x95c, 0x956, 0x950,
+ 0x948, 0x942, 0x93c, 0x936, 0x930, 0x928, 0x922, 0x91c,
+ 0x916, 0x910, 0x90a, 0x904, 0x8fc, 0x8f6, 0x8f0, 0x8ea,
+ 0x8e4, 0x8de, 0x8d8, 0x8d2, 0x8cc, 0x8c6, 0x8c0, 0x8ba,
+ 0x8b4, 0x8ae, 0x8a8, 0x8a2, 0x89c, 0x896, 0x890, 0x88a,
+ 0x884, 0x87e, 0x878, 0x872, 0x86c, 0x866, 0x860, 0x85a,
+ 0x854, 0x850, 0x84a, 0x844, 0x83e, 0x838, 0x832, 0x82c,
+ 0x828, 0x822, 0x81c, 0x816, 0x810, 0x80c, 0x806, 0x800
+};
+#else
/*
* logsin table
*/
@@ -130,6 +241,7 @@ static const Bit16u exprom[256] = {
0x42a, 0x428, 0x425, 0x422, 0x41f, 0x41c, 0x419, 0x416,
0x414, 0x411, 0x40e, 0x40b, 0x408, 0x406, 0x403, 0x400
};
+#endif
/*
* freq mult table multiplied by 2
@@ -207,8 +319,8 @@ static const Bit16u panlawtable[] =
* Envelope generator
*/
+#if !OPL_FAST_WAVEGEN
typedef Bit16s(*envelope_sinfunc)(Bit16u phase, Bit16u envelope);
-typedef void(*envelope_genfunc)(opl3_slot *slott);
static Bit16s OPL3_EnvelopeCalcExp(Bit32u level)
{
@@ -366,6 +478,7 @@ static const envelope_sinfunc envelope_sin[8] = {
OPL3_EnvelopeCalcSin6,
OPL3_EnvelopeCalcSin7
};
+#endif
enum envelope_gen_num
{
@@ -401,6 +514,15 @@ static void OPL3_EnvelopeCalc(opl3_slot *slot)
Bit8u reset = 0;
slot->eg_out = slot->eg_rout + (slot->reg_tl << 2)
+ (slot->eg_ksl >> kslshift[slot->reg_ksl]) + *slot->trem;
+
+#if OPL_FAST_WAVEGEN
+ if (slot->eg_out > 0x1ff)
+ {
+ slot->eg_out = 0x1ff;
+ }
+ slot->eg_out <<= 3;
+#endif
+
if (slot->key && slot->eg_gen == envelope_gen_num_release)
{
reset = 1;
@@ -687,12 +809,92 @@ static void OPL3_SlotWriteE0(opl3_slot *slot, Bit8u data)
{
slot->reg_wf &= 0x03;
}
+
+#if OPL_FAST_WAVEGEN
+ switch (slot->reg_wf)
+ {
+ case 1:
+ case 4:
+ case 5:
+ slot->maskzero = 0x200;
+ break;
+ case 3:
+ slot->maskzero = 0x100;
+ break;
+ default:
+ slot->maskzero = 0;
+ break;
+ }
+
+ switch (slot->reg_wf)
+ {
+ case 4:
+ slot->signpos = (31-8); /* sigext of (phase & 0x100) */
+ break;
+ case 0:
+ case 6:
+ case 7:
+ slot->signpos = (31-9); /* sigext of (phase & 0x200) */
+ break;
+ default:
+ slot->signpos = (31-16); /* set "neg" to zero */
+ break;
+ }
+
+ switch (slot->reg_wf)
+ {
+ case 4:
+ case 5:
+ slot->phaseshift = 1;
+ break;
+ case 6:
+ slot->phaseshift = 16; /* set phase to zero and flag for non-sin wave */
+ break;
+ case 7:
+ slot->phaseshift = 32; /* no shift (work by mod 32), but flag for non-sin wave */
+ break;
+ default:
+ slot->phaseshift = 0;
+ break;
+ }
+#endif
}
+#if OPL_FAST_WAVEGEN
+static void OPL3_SlotGenerate(opl3_slot *slot)
+{
+ Bit16u phase = slot->pg_phase_out + *slot->mod;
+ Bit32u neg, level;
+ Bit8u phaseshift;
+
+ /* Fast paths for mute segments */
+ if (phase & slot->maskzero)
+ {
+ slot->out = 0;
+ return;
+ }
+
+ neg = (Bit32s)((Bit32u)phase << slot->signpos) >> 31;
+ phaseshift = slot->phaseshift;
+ level = slot->eg_out;
+
+ phase <<= phaseshift;
+ if (phaseshift <= 1)
+ {
+ level += logsinrom[phase & 0x1ff];
+ }
+ else
+ {
+ level += ((phase ^ neg) & 0x3ff) << 3;
+ }
+ slot->out = exprom[level & 0xff] >> (level >> 8) ^ neg;
+}
+#else
static void OPL3_SlotGenerate(opl3_slot *slot)
{
slot->out = envelope_sin[slot->reg_wf](slot->pg_phase_out + *slot->mod, slot->eg_out);
}
+#endif
static void OPL3_SlotCalcFB(opl3_slot *slot)
{
@@ -1176,7 +1378,7 @@ void OPL3_Generate(opl3_chip *chip, Bit16s *buf)
if (chip->eg_timerrem || chip->eg_state)
{
- if (chip->eg_timer == (uint64_t)0xfffffffffU)
+ if (chip->eg_timer == 0xfffffffffULL)
{
chip->eg_timer = 0;
chip->eg_timerrem = 1;
@@ -1231,10 +1433,17 @@ void OPL3_Reset(opl3_chip *chip, Bit32u samplerate)
chip->slot[slotnum].chip = chip;
chip->slot[slotnum].mod = &chip->zeromod;
chip->slot[slotnum].eg_rout = 0x1ff;
+#if OPL_FAST_WAVEGEN
+ chip->slot[slotnum].eg_out = 0x1ff << 3;
+#else
chip->slot[slotnum].eg_out = 0x1ff;
+#endif
chip->slot[slotnum].eg_gen = envelope_gen_num_release;
chip->slot[slotnum].trem = (Bit8u*)&chip->zeromod;
chip->slot[slotnum].slot_num = slotnum;
+#if OPL_FAST_WAVEGEN
+ chip->slot[slotnum].signpos = (31-9); /* for wf=0 need use sigext of (phase & 0x200) */
+#endif
}
for (channum = 0; channum < 18; channum++)
{
@@ -1420,16 +1629,23 @@ void OPL3_GenerateStream(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples)
}
}
+#define OPL3_MIN(A, B) (((A) > (B)) ? (B) : (A))
+#define OPL3_MAX(A, B) (((A) < (B)) ? (B) : (A))
+#define OPL3_CLAMP(V, MIN, MAX) OPL3_MAX(OPL3_MIN(V, MAX), MIN)
+
void OPL3_GenerateStreamMix(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples)
{
Bit32u i;
Bit16s sample[2];
+ Bit32s mix[2];
for(i = 0; i < numsamples; i++)
{
OPL3_GenerateResampled(chip, sample);
- sndptr[0] += sample[0];
- sndptr[1] += sample[1];
+ mix[0] = sndptr[0] + sample[0];
+ mix[1] = sndptr[1] + sample[1];
+ sndptr[0] = OPL3_CLAMP(mix[0], INT16_MIN, INT16_MAX);
+ sndptr[1] = OPL3_CLAMP(mix[1], INT16_MIN, INT16_MAX);
sndptr += 2;
}
}
diff --git a/src/chips/nuked/nukedopl3.h b/src/chips/nuked/nukedopl3.h
index 268e8de..9849569 100644
--- a/src/chips/nuked/nukedopl3.h
+++ b/src/chips/nuked/nukedopl3.h
@@ -33,13 +33,16 @@
#define OPL_OPL3_H
#include <inttypes.h>
+#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
+
#define OPL_WRITEBUF_SIZE 1024
#define OPL_WRITEBUF_DELAY 2
+#define OPL_FAST_WAVEGEN 1 /* optimized waveform generation */
typedef uintptr_t Bitu;
typedef intptr_t Bits;
@@ -86,6 +89,12 @@ struct _opl3_slot {
Bit32u pg_phase;
Bit16u pg_phase_out;
Bit8u slot_num;
+
+#if OPL_FAST_WAVEGEN
+ Bit16u maskzero;
+ Bit8u signpos;
+ Bit8u phaseshift;
+#endif
};
struct _opl3_channel {