diff options
| author | Vitaly Novichkov <Wohlstand@users.noreply.github.com> | 2019-02-08 11:15:04 +0300 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-02-08 11:15:04 +0300 | 
| commit | ce90247dcf72ea48a44cb1b733fc0634dbf6397b (patch) | |
| tree | c9bb52a1670e58b362a850e3727f3927399086af /src/chips | |
| parent | 307678d1831434b9565c82960aae3b97d37f71df (diff) | |
| parent | f5f850325b6aacb44d9f07dac53e55f2708833ba (diff) | |
| download | libADLMIDI-ce90247dcf72ea48a44cb1b733fc0634dbf6397b.tar.gz libADLMIDI-ce90247dcf72ea48a44cb1b733fc0634dbf6397b.tar.bz2 libADLMIDI-ce90247dcf72ea48a44cb1b733fc0634dbf6397b.zip | |
Merge pull request #201 from jpcima/nukedopl
Nuked OPL 1.8 optimizations port
Diffstat (limited to 'src/chips')
| -rw-r--r-- | src/chips/nuked/nukedopl3.c | 224 | ||||
| -rw-r--r-- | src/chips/nuked/nukedopl3.h | 9 | 
2 files changed, 229 insertions, 4 deletions
| diff --git a/src/chips/nuked/nukedopl3.c b/src/chips/nuked/nukedopl3.c index 267e67a..ef8291a 100644 --- a/src/chips/nuked/nukedopl3.c +++ b/src/chips/nuked/nukedopl3.c @@ -53,6 +53,117 @@ enum {  }; +#if OPL_FAST_WAVEGEN +/* + * logsin table + */ + +static const Bit16u logsinrom[512] = { +    0x859, 0x6c3, 0x607, 0x58b, 0x52e, 0x4e4, 0x4a6, 0x471, +    0x443, 0x41a, 0x3f5, 0x3d3, 0x3b5, 0x398, 0x37e, 0x365, +    0x34e, 0x339, 0x324, 0x311, 0x2ff, 0x2ed, 0x2dc, 0x2cd, +    0x2bd, 0x2af, 0x2a0, 0x293, 0x286, 0x279, 0x26d, 0x261, +    0x256, 0x24b, 0x240, 0x236, 0x22c, 0x222, 0x218, 0x20f, +    0x206, 0x1fd, 0x1f5, 0x1ec, 0x1e4, 0x1dc, 0x1d4, 0x1cd, +    0x1c5, 0x1be, 0x1b7, 0x1b0, 0x1a9, 0x1a2, 0x19b, 0x195, +    0x18f, 0x188, 0x182, 0x17c, 0x177, 0x171, 0x16b, 0x166, +    0x160, 0x15b, 0x155, 0x150, 0x14b, 0x146, 0x141, 0x13c, +    0x137, 0x133, 0x12e, 0x129, 0x125, 0x121, 0x11c, 0x118, +    0x114, 0x10f, 0x10b, 0x107, 0x103, 0x0ff, 0x0fb, 0x0f8, +    0x0f4, 0x0f0, 0x0ec, 0x0e9, 0x0e5, 0x0e2, 0x0de, 0x0db, +    0x0d7, 0x0d4, 0x0d1, 0x0cd, 0x0ca, 0x0c7, 0x0c4, 0x0c1, +    0x0be, 0x0bb, 0x0b8, 0x0b5, 0x0b2, 0x0af, 0x0ac, 0x0a9, +    0x0a7, 0x0a4, 0x0a1, 0x09f, 0x09c, 0x099, 0x097, 0x094, +    0x092, 0x08f, 0x08d, 0x08a, 0x088, 0x086, 0x083, 0x081, +    0x07f, 0x07d, 0x07a, 0x078, 0x076, 0x074, 0x072, 0x070, +    0x06e, 0x06c, 0x06a, 0x068, 0x066, 0x064, 0x062, 0x060, +    0x05e, 0x05c, 0x05b, 0x059, 0x057, 0x055, 0x053, 0x052, +    0x050, 0x04e, 0x04d, 0x04b, 0x04a, 0x048, 0x046, 0x045, +    0x043, 0x042, 0x040, 0x03f, 0x03e, 0x03c, 0x03b, 0x039, +    0x038, 0x037, 0x035, 0x034, 0x033, 0x031, 0x030, 0x02f, +    0x02e, 0x02d, 0x02b, 0x02a, 0x029, 0x028, 0x027, 0x026, +    0x025, 0x024, 0x023, 0x022, 0x021, 0x020, 0x01f, 0x01e, +    0x01d, 0x01c, 0x01b, 0x01a, 0x019, 0x018, 0x017, 0x017, +    0x016, 0x015, 0x014, 0x014, 0x013, 0x012, 0x011, 0x011, +    0x010, 0x00f, 0x00f, 0x00e, 0x00d, 0x00d, 0x00c, 0x00c, +    0x00b, 0x00a, 0x00a, 0x009, 0x009, 0x008, 0x008, 0x007, +    0x007, 0x007, 0x006, 0x006, 0x005, 0x005, 0x005, 0x004, +    0x004, 0x004, 0x003, 0x003, 0x003, 0x002, 0x002, 0x002, +    0x002, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, +    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, +    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, +    0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x002, +    0x002, 0x002, 0x002, 0x003, 0x003, 0x003, 0x004, 0x004, +    0x004, 0x005, 0x005, 0x005, 0x006, 0x006, 0x007, 0x007, +    0x007, 0x008, 0x008, 0x009, 0x009, 0x00a, 0x00a, 0x00b, +    0x00c, 0x00c, 0x00d, 0x00d, 0x00e, 0x00f, 0x00f, 0x010, +    0x011, 0x011, 0x012, 0x013, 0x014, 0x014, 0x015, 0x016, +    0x017, 0x017, 0x018, 0x019, 0x01a, 0x01b, 0x01c, 0x01d, +    0x01e, 0x01f, 0x020, 0x021, 0x022, 0x023, 0x024, 0x025, +    0x026, 0x027, 0x028, 0x029, 0x02a, 0x02b, 0x02d, 0x02e, +    0x02f, 0x030, 0x031, 0x033, 0x034, 0x035, 0x037, 0x038, +    0x039, 0x03b, 0x03c, 0x03e, 0x03f, 0x040, 0x042, 0x043, +    0x045, 0x046, 0x048, 0x04a, 0x04b, 0x04d, 0x04e, 0x050, +    0x052, 0x053, 0x055, 0x057, 0x059, 0x05b, 0x05c, 0x05e, +    0x060, 0x062, 0x064, 0x066, 0x068, 0x06a, 0x06c, 0x06e, +    0x070, 0x072, 0x074, 0x076, 0x078, 0x07a, 0x07d, 0x07f, +    0x081, 0x083, 0x086, 0x088, 0x08a, 0x08d, 0x08f, 0x092, +    0x094, 0x097, 0x099, 0x09c, 0x09f, 0x0a1, 0x0a4, 0x0a7, +    0x0a9, 0x0ac, 0x0af, 0x0b2, 0x0b5, 0x0b8, 0x0bb, 0x0be, +    0x0c1, 0x0c4, 0x0c7, 0x0ca, 0x0cd, 0x0d1, 0x0d4, 0x0d7, +    0x0db, 0x0de, 0x0e2, 0x0e5, 0x0e9, 0x0ec, 0x0f0, 0x0f4, +    0x0f8, 0x0fb, 0x0ff, 0x103, 0x107, 0x10b, 0x10f, 0x114, +    0x118, 0x11c, 0x121, 0x125, 0x129, 0x12e, 0x133, 0x137, +    0x13c, 0x141, 0x146, 0x14b, 0x150, 0x155, 0x15b, 0x160, +    0x166, 0x16b, 0x171, 0x177, 0x17c, 0x182, 0x188, 0x18f, +    0x195, 0x19b, 0x1a2, 0x1a9, 0x1b0, 0x1b7, 0x1be, 0x1c5, +    0x1cd, 0x1d4, 0x1dc, 0x1e4, 0x1ec, 0x1f5, 0x1fd, 0x206, +    0x20f, 0x218, 0x222, 0x22c, 0x236, 0x240, 0x24b, 0x256, +    0x261, 0x26d, 0x279, 0x286, 0x293, 0x2a0, 0x2af, 0x2bd, +    0x2cd, 0x2dc, 0x2ed, 0x2ff, 0x311, 0x324, 0x339, 0x34e, +    0x365, 0x37e, 0x398, 0x3b5, 0x3d3, 0x3f5, 0x41a, 0x443, +    0x471, 0x4a6, 0x4e4, 0x52e, 0x58b, 0x607, 0x6c3, 0x859 +}; + +/* + * exp table + */ + +static const Bit16u exprom[256] = { +    0xff4, 0xfea, 0xfde, 0xfd4, 0xfc8, 0xfbe, 0xfb4, 0xfa8, +    0xf9e, 0xf92, 0xf88, 0xf7e, 0xf72, 0xf68, 0xf5c, 0xf52, +    0xf48, 0xf3e, 0xf32, 0xf28, 0xf1e, 0xf14, 0xf08, 0xefe, +    0xef4, 0xeea, 0xee0, 0xed4, 0xeca, 0xec0, 0xeb6, 0xeac, +    0xea2, 0xe98, 0xe8e, 0xe84, 0xe7a, 0xe70, 0xe66, 0xe5c, +    0xe52, 0xe48, 0xe3e, 0xe34, 0xe2a, 0xe20, 0xe16, 0xe0c, +    0xe04, 0xdfa, 0xdf0, 0xde6, 0xddc, 0xdd2, 0xdca, 0xdc0, +    0xdb6, 0xdac, 0xda4, 0xd9a, 0xd90, 0xd88, 0xd7e, 0xd74, +    0xd6a, 0xd62, 0xd58, 0xd50, 0xd46, 0xd3c, 0xd34, 0xd2a, +    0xd22, 0xd18, 0xd10, 0xd06, 0xcfe, 0xcf4, 0xcec, 0xce2, +    0xcda, 0xcd0, 0xcc8, 0xcbe, 0xcb6, 0xcae, 0xca4, 0xc9c, +    0xc92, 0xc8a, 0xc82, 0xc78, 0xc70, 0xc68, 0xc60, 0xc56, +    0xc4e, 0xc46, 0xc3c, 0xc34, 0xc2c, 0xc24, 0xc1c, 0xc12, +    0xc0a, 0xc02, 0xbfa, 0xbf2, 0xbea, 0xbe0, 0xbd8, 0xbd0, +    0xbc8, 0xbc0, 0xbb8, 0xbb0, 0xba8, 0xba0, 0xb98, 0xb90, +    0xb88, 0xb80, 0xb78, 0xb70, 0xb68, 0xb60, 0xb58, 0xb50, +    0xb48, 0xb40, 0xb38, 0xb32, 0xb2a, 0xb22, 0xb1a, 0xb12, +    0xb0a, 0xb02, 0xafc, 0xaf4, 0xaec, 0xae4, 0xade, 0xad6, +    0xace, 0xac6, 0xac0, 0xab8, 0xab0, 0xaa8, 0xaa2, 0xa9a, +    0xa92, 0xa8c, 0xa84, 0xa7c, 0xa76, 0xa6e, 0xa68, 0xa60, +    0xa58, 0xa52, 0xa4a, 0xa44, 0xa3c, 0xa36, 0xa2e, 0xa28, +    0xa20, 0xa18, 0xa12, 0xa0c, 0xa04, 0x9fe, 0x9f6, 0x9f0, +    0x9e8, 0x9e2, 0x9da, 0x9d4, 0x9ce, 0x9c6, 0x9c0, 0x9b8, +    0x9b2, 0x9ac, 0x9a4, 0x99e, 0x998, 0x990, 0x98a, 0x984, +    0x97c, 0x976, 0x970, 0x96a, 0x962, 0x95c, 0x956, 0x950, +    0x948, 0x942, 0x93c, 0x936, 0x930, 0x928, 0x922, 0x91c, +    0x916, 0x910, 0x90a, 0x904, 0x8fc, 0x8f6, 0x8f0, 0x8ea, +    0x8e4, 0x8de, 0x8d8, 0x8d2, 0x8cc, 0x8c6, 0x8c0, 0x8ba, +    0x8b4, 0x8ae, 0x8a8, 0x8a2, 0x89c, 0x896, 0x890, 0x88a, +    0x884, 0x87e, 0x878, 0x872, 0x86c, 0x866, 0x860, 0x85a, +    0x854, 0x850, 0x84a, 0x844, 0x83e, 0x838, 0x832, 0x82c, +    0x828, 0x822, 0x81c, 0x816, 0x810, 0x80c, 0x806, 0x800 +}; +#else  /*   * logsin table   */ @@ -130,6 +241,7 @@ static const Bit16u exprom[256] = {      0x42a, 0x428, 0x425, 0x422, 0x41f, 0x41c, 0x419, 0x416,      0x414, 0x411, 0x40e, 0x40b, 0x408, 0x406, 0x403, 0x400  }; +#endif  /*   * freq mult table multiplied by 2 @@ -207,8 +319,8 @@ static const Bit16u panlawtable[] =   * Envelope generator   */ +#if !OPL_FAST_WAVEGEN  typedef Bit16s(*envelope_sinfunc)(Bit16u phase, Bit16u envelope); -typedef void(*envelope_genfunc)(opl3_slot *slott);  static Bit16s OPL3_EnvelopeCalcExp(Bit32u level)  { @@ -366,6 +478,7 @@ static const envelope_sinfunc envelope_sin[8] = {      OPL3_EnvelopeCalcSin6,      OPL3_EnvelopeCalcSin7  }; +#endif  enum envelope_gen_num  { @@ -401,6 +514,15 @@ static void OPL3_EnvelopeCalc(opl3_slot *slot)      Bit8u reset = 0;      slot->eg_out = slot->eg_rout + (slot->reg_tl << 2)                   + (slot->eg_ksl >> kslshift[slot->reg_ksl]) + *slot->trem; + +#if OPL_FAST_WAVEGEN +    if (slot->eg_out > 0x1ff) +    { +        slot->eg_out = 0x1ff; +    } +    slot->eg_out <<= 3; +#endif +      if (slot->key && slot->eg_gen == envelope_gen_num_release)      {          reset = 1; @@ -687,12 +809,92 @@ static void OPL3_SlotWriteE0(opl3_slot *slot, Bit8u data)      {          slot->reg_wf &= 0x03;      } + +#if OPL_FAST_WAVEGEN +    switch (slot->reg_wf) +    { +    case 1: +    case 4: +    case 5: +        slot->maskzero = 0x200; +        break; +    case 3: +        slot->maskzero = 0x100; +        break; +    default: +        slot->maskzero = 0; +        break; +    } + +    switch (slot->reg_wf) +    { +    case 4: +        slot->signpos = (31-8);  /* sigext of (phase & 0x100) */ +        break; +    case 0: +    case 6: +    case 7: +        slot->signpos = (31-9);  /* sigext of (phase & 0x200) */ +        break; +    default: +        slot->signpos = (31-16);  /* set "neg" to zero */ +        break; +    } + +    switch (slot->reg_wf) +    { +    case 4: +    case 5: +        slot->phaseshift = 1; +        break; +    case 6: +        slot->phaseshift = 16; /* set phase to zero and flag for non-sin wave */ +        break; +    case 7: +        slot->phaseshift = 32; /* no shift (work by mod 32), but flag for non-sin wave */ +        break; +    default: +        slot->phaseshift = 0; +        break; +    } +#endif  } +#if OPL_FAST_WAVEGEN +static void OPL3_SlotGenerate(opl3_slot *slot) +{ +    Bit16u phase = slot->pg_phase_out + *slot->mod; +    Bit32u neg, level; +    Bit8u  phaseshift; + +    /* Fast paths for mute segments */ +    if (phase & slot->maskzero) +    { +        slot->out = 0; +        return; +    } + +    neg = (Bit32s)((Bit32u)phase << slot->signpos) >> 31; +    phaseshift = slot->phaseshift; +    level = slot->eg_out; + +    phase <<= phaseshift; +    if (phaseshift <= 1) +    { +        level += logsinrom[phase & 0x1ff]; +    } +    else +    { +        level += ((phase ^ neg) & 0x3ff) << 3; +    } +    slot->out = exprom[level & 0xff] >> (level >> 8) ^ neg; +} +#else  static void OPL3_SlotGenerate(opl3_slot *slot)  {      slot->out = envelope_sin[slot->reg_wf](slot->pg_phase_out + *slot->mod, slot->eg_out);  } +#endif  static void OPL3_SlotCalcFB(opl3_slot *slot)  { @@ -1176,7 +1378,7 @@ void OPL3_Generate(opl3_chip *chip, Bit16s *buf)      if (chip->eg_timerrem || chip->eg_state)      { -        if (chip->eg_timer == (uint64_t)0xfffffffffU) +        if (chip->eg_timer == 0xfffffffffULL)          {              chip->eg_timer = 0;              chip->eg_timerrem = 1; @@ -1231,10 +1433,17 @@ void OPL3_Reset(opl3_chip *chip, Bit32u samplerate)          chip->slot[slotnum].chip = chip;          chip->slot[slotnum].mod = &chip->zeromod;          chip->slot[slotnum].eg_rout = 0x1ff; +#if OPL_FAST_WAVEGEN +        chip->slot[slotnum].eg_out = 0x1ff << 3; +#else          chip->slot[slotnum].eg_out = 0x1ff; +#endif          chip->slot[slotnum].eg_gen = envelope_gen_num_release;          chip->slot[slotnum].trem = (Bit8u*)&chip->zeromod;          chip->slot[slotnum].slot_num = slotnum; +#if OPL_FAST_WAVEGEN +        chip->slot[slotnum].signpos = (31-9);  /* for wf=0 need use sigext of (phase & 0x200) */ +#endif      }      for (channum = 0; channum < 18; channum++)      { @@ -1420,16 +1629,23 @@ void OPL3_GenerateStream(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples)      }  } +#define OPL3_MIN(A, B)          (((A) > (B)) ? (B) : (A)) +#define OPL3_MAX(A, B)          (((A) < (B)) ? (B) : (A)) +#define OPL3_CLAMP(V, MIN, MAX) OPL3_MAX(OPL3_MIN(V, MAX), MIN) +  void OPL3_GenerateStreamMix(opl3_chip *chip, Bit16s *sndptr, Bit32u numsamples)  {      Bit32u i;      Bit16s sample[2]; +    Bit32s mix[2];      for(i = 0; i < numsamples; i++)      {          OPL3_GenerateResampled(chip, sample); -        sndptr[0] += sample[0]; -        sndptr[1] += sample[1]; +        mix[0] = sndptr[0] + sample[0]; +        mix[1] = sndptr[1] + sample[1]; +        sndptr[0] = OPL3_CLAMP(mix[0], INT16_MIN, INT16_MAX); +        sndptr[1] = OPL3_CLAMP(mix[1], INT16_MIN, INT16_MAX);          sndptr += 2;      }  } diff --git a/src/chips/nuked/nukedopl3.h b/src/chips/nuked/nukedopl3.h index 268e8de..9849569 100644 --- a/src/chips/nuked/nukedopl3.h +++ b/src/chips/nuked/nukedopl3.h @@ -33,13 +33,16 @@  #define OPL_OPL3_H  #include <inttypes.h> +#include <stdint.h>  #ifdef __cplusplus  extern "C" {  #endif +  #define OPL_WRITEBUF_SIZE   1024  #define OPL_WRITEBUF_DELAY  2 +#define OPL_FAST_WAVEGEN    1 /* optimized waveform generation */  typedef uintptr_t       Bitu;  typedef intptr_t        Bits; @@ -86,6 +89,12 @@ struct _opl3_slot {      Bit32u pg_phase;      Bit16u pg_phase_out;      Bit8u slot_num; + +#if OPL_FAST_WAVEGEN +    Bit16u maskzero; +    Bit8u  signpos; +    Bit8u  phaseshift; +#endif  };  struct _opl3_channel { |