diff --git a/Makefile b/Makefile index e796073c28..bbddf7ff0c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ # Tvheadend streaming server. # Copyright (C) 2007-2009 Andreas Ă–man # Copyright (C) 2012-2015 Adam Sutton -# Copyright (C) 2012-2017 Jaroslav Kysela +# Copyright (C) 2012-2018 Jaroslav Kysela # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -603,22 +603,6 @@ SRCS-TSDEBUG = \ SRCS-${CONFIG_TSDEBUG} += $(SRCS-TSDEBUG) I18N-C += $(SRCS-TSDEBUG) -# FFdecsa -ifneq ($(CONFIG_DVBCSA),yes) -FFDECSA-$(CONFIG_CAPMT) = yes -FFDECSA-$(CONFIG_CWC) = yes -FFDECSA-$(CONFIG_CONSTCW) = yes -endif - -ifeq ($(FFDECSA-yes),yes) -SRCS-yes += src/descrambler/ffdecsa/ffdecsa_interface.c \ - src/descrambler/ffdecsa/ffdecsa_int.c -SRCS-${CONFIG_MMX} += src/descrambler/ffdecsa/ffdecsa_mmx.c -SRCS-${CONFIG_SSE2} += src/descrambler/ffdecsa/ffdecsa_sse2.c -${BUILDDIR}/src/descrambler/ffdecsa/ffdecsa_mmx.o : CFLAGS += -mmmx -${BUILDDIR}/src/descrambler/ffdecsa/ffdecsa_sse2.o : CFLAGS += -msse2 -endif - # crypto algorithms SRCS-${CONFIG_SSL} += src/descrambler/algo/libaesdec.c SRCS-${CONFIG_SSL} += src/descrambler/algo/libaes128dec.c diff --git a/configure b/configure index f13864a9c7..8f430ea049 100755 --- a/configure +++ b/configure @@ -66,7 +66,6 @@ OPTIONS=( "tvhcsa:auto" "bundle:no" "pngquant:no" - "dvbcsa:no" "kqueue:no" "dbus_1:auto" "android:no" @@ -648,10 +647,11 @@ fi # if enabled cwc || enabled cccam || enabled capmt || enabled constcw; then enable tvhcsa + enable dvbcsa if enabled dvbcsa; then (check_cc_header "dvbcsa/dvbcsa" dvbcsa_h &&\ check_cc_lib dvbcsa dvbcsa_l) ||\ - die "Failed to find dvbcsa support (use --disable-dvbcsa)" + die "Failed to find dvbcsa library" LDFLAGS="$LDFLAGS -ldvbcsa" fi fi diff --git a/src/descrambler/ffdecsa/FFdecsa.c b/src/descrambler/ffdecsa/FFdecsa.c deleted file mode 100644 index ce0c28a1f9..0000000000 --- a/src/descrambler/ffdecsa/FFdecsa.c +++ /dev/null @@ -1,901 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - -#include -#include -#include -#include - -#ifndef NULL -#define NULL 0 -#endif - -//#define DEBUG -#ifdef DEBUG -#define DBG(a) a -#else -#define DBG(a) -#endif - -//// parallelization stuff, large speed differences are possible -// possible choices -#define PARALLEL_32_4CHAR 320 -#define PARALLEL_32_4CHARA 321 -#define PARALLEL_32_INT 322 -#define PARALLEL_64_8CHAR 640 -#define PARALLEL_64_8CHARA 641 -#define PARALLEL_64_2INT 642 -#define PARALLEL_64_LONG 643 -#define PARALLEL_64_MMX 644 -#define PARALLEL_128_16CHAR 1280 -#define PARALLEL_128_16CHARA 1281 -#define PARALLEL_128_4INT 1282 -#define PARALLEL_128_2LONG 1283 -#define PARALLEL_128_2MMX 1284 -#define PARALLEL_128_SSE 1285 -#define PARALLEL_128_SSE2 1286 - -#include "parallel_generic.h" -//// conditionals -#if PARALLEL_MODE==PARALLEL_32_4CHAR -#include "parallel_032_4char.h" -#elif PARALLEL_MODE==PARALLEL_32_4CHARA -#include "parallel_032_4charA.h" -#elif PARALLEL_MODE==PARALLEL_32_INT -#include "parallel_032_int.h" -#define FUNC(x) (x ## _32int) -#elif PARALLEL_MODE==PARALLEL_64_8CHAR -#include "parallel_064_8char.h" -#elif PARALLEL_MODE==PARALLEL_64_8CHARA -#include "parallel_064_8charA.h" -#elif PARALLEL_MODE==PARALLEL_64_2INT -#include "parallel_064_2int.h" -#elif PARALLEL_MODE==PARALLEL_64_LONG -#include "parallel_064_long.h" -#elif PARALLEL_MODE==PARALLEL_64_MMX -#include "parallel_064_mmx.h" -#define FUNC(x) (x ## _64mmx) -#elif PARALLEL_MODE==PARALLEL_128_16CHAR -#include "parallel_128_16char.h" -#elif PARALLEL_MODE==PARALLEL_128_16CHARA -#include "parallel_128_16charA.h" -#elif PARALLEL_MODE==PARALLEL_128_4INT -#include "parallel_128_4int.h" -#elif PARALLEL_MODE==PARALLEL_128_2LONG -#include "parallel_128_2long.h" -#elif PARALLEL_MODE==PARALLEL_128_2MMX -#include "parallel_128_2mmx.h" -#elif PARALLEL_MODE==PARALLEL_128_SSE -#include "parallel_128_sse.h" -#elif PARALLEL_MODE==PARALLEL_128_SSE2 -#include "parallel_128_sse2.h" -#define FUNC(x) (x ## _128sse2) -#else -#error "unknown/undefined parallel mode" -#endif - - -// stuff depending on conditionals - -#define BYTES_PER_GROUP (GROUP_PARALLELISM/8) -#define BYPG BYTES_PER_GROUP -#define BITS_PER_GROUP GROUP_PARALLELISM -#define BIPG BITS_PER_GROUP - -#ifndef MALLOC -#define MALLOC(X) malloc(X) -#endif -#ifndef FREE -#define FREE(X) free(X) -#endif -#ifndef MEMALIGN -#define MEMALIGN -#endif - -//// debug tool - -#if 0 -static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){ - int i; - for(i=0;i>4)&0xf; - iA[1]=(ck[0] )&0xf; - iA[2]=(ck[1]>>4)&0xf; - iA[3]=(ck[1] )&0xf; - iA[4]=(ck[2]>>4)&0xf; - iA[5]=(ck[2] )&0xf; - iA[6]=(ck[3]>>4)&0xf; - iA[7]=(ck[3] )&0xf; - iB[0]=(ck[4]>>4)&0xf; - iB[1]=(ck[4] )&0xf; - iB[2]=(ck[5]>>4)&0xf; - iB[3]=(ck[5] )&0xf; - iB[4]=(ck[6]>>4)&0xf; - iB[5]=(ck[6] )&0xf; - iB[6]=(ck[7]>>4)&0xf; - iB[7]=(ck[7] )&0xf; -} - -//----- stream main function - -#define STREAM_INIT -#include "stream.c" -#undef STREAM_INIT - -#define STREAM_NORMAL -#include "stream.c" -#undef STREAM_NORMAL - - -//-----block decypher - -//-----key schedule for block decypher - -static void key_schedule_block( - unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key. - unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule. -{ - static const unsigned char key_perm[0x40] = { - 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40, - 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29, - 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11, - 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37, - }; - - int i,j,k; - int bit[64]; - int newbit[64]; - int kb[7][8]; - - // 56 steps - // 56 key bytes kk(55)..kk(0) by key schedule from ck - - // kb(6,0) .. kb(6,7) = ck(0) .. ck(7) - kb[6][0] = ck[0]; - kb[6][1] = ck[1]; - kb[6][2] = ck[2]; - kb[6][3] = ck[3]; - kb[6][4] = ck[4]; - kb[6][5] = ck[5]; - kb[6][6] = ck[6]; - kb[6][7] = ck[7]; - - // calculate kb[5] .. kb[0] - for(i=5; i>=0; i--){ - // 64 bit perm on kb - for(j=0; j<8; j++){ - for(k=0; k<8; k++){ - bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1; - newbit[key_perm[j*8+k]-1] = bit[j*8+k]; - } - } - for(j=0; j<8; j++){ - kb[i][j] = 0; - for(k=0; k<8; k++){ - kb[i][j] |= newbit[j*8+k] << (7-k); - } - } - } - - // xor to give kk - for(i=0; i<7; i++){ - for(j=0; j<8; j++){ - kk[i*8+j] = kb[i][j] ^ i; - } - } - -} - -//-----block utils - -static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){ - int *ri=(int *)in; - int *ibi=(int *)out; - int j,i,k,g; - // copy and first step - for(g=0;g>16) | (b&0xffff0000) ; - } - } - } -//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); -// now 01010101 - for(j=0;j<8;j+=2){ - for(i=0;i<1;i++){ - for(k=0;k>8) | (b&0xff00ff00); - } - } - } -//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); -// now 00000000 -} - -static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){ - int *ri=(int *)in; - int *bdi=(int *)out; - int j,i,k,g; -#define INTS_PER_ROW (GROUP_PARALLELISM/8*2) -//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); -// now 00000000 - for(j=0;j<8;j+=2){ - for(i=0;i<1;i++){ - for(k=0;k>8) | (b&0xff00ff00); - } - } - } -//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); -// now 01010101 - for(j=0;j<8;j+=4){ - for(i=0;i<2;i++){ - for(k=0;k>16) | (b&0xffff0000) ; - } - } - } -//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM); -// now 01230123 - for(g=0;g=0;i--){ - { - MEMALIGN batch tkkmulti=kkmulti[i]; - batch *si=(batch *)sbox_in; - batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6); - for(g=0;gck,pk,8); -// precalculations for stream - key_schedule_stream(key->ck,key->iA,key->iB); - for(by=0;by<8;by++){ - for(bi=0;bi<8;bi++){ - key->ck_g[by][bi]=(key->ck[by]&(1<iA_g[by][bi]=(key->iA[by]&(1<iB_g[by][bi]=(key->iB[by]&(1<ck,key->kk); - for(i=0;i<56;i++){ - for(j=0;jkkmulti[i])+j)=key->kk[i]; - } - } -} - -extern void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od); - -void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od) -{ - schedule_key(&((struct csa_keys_t *)keys)->even,ev); - schedule_key(&((struct csa_keys_t *)keys)->odd,od); -} - -extern void FUNC(set_even_control_word)(void *keys, const unsigned char *pk); - -void FUNC(set_even_control_word)(void *keys, const unsigned char *pk) -{ - schedule_key(&((struct csa_keys_t *)keys)->even,pk); -} - -extern void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk); - -void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk){ - schedule_key(&((struct csa_keys_t *)keys)->odd,pk); -} - -//-----get internal parallelism - -extern int FUNC(get_internal_parallelism)(void); - -int FUNC(get_internal_parallelism)(void) -{ - return GROUP_PARALLELISM; -} - -//-----get suggested cluster size - -extern int FUNC(get_suggested_cluster_size)(void); - -int FUNC(get_suggested_cluster_size)(void) -{ - int r; - r=GROUP_PARALLELISM+GROUP_PARALLELISM/10; - if(reven.ck,8); - memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8); -} -#endif - -//----- decrypt - -extern int FUNC(decrypt_packets)(void *keys, unsigned char **cluster); -int FUNC(decrypt_packets)(void *keys, unsigned char **cluster) -{ - // statistics, currently unused - int stat_no_scramble=0; - int stat_reserved=0; - int stat_decrypted[2]={0,0}; - int stat_decrypted_mini=0; - unsigned char **clst; - unsigned char **clst2; - int grouped; - int group_ev_od; - int advanced; - int can_advance; - unsigned char *g_pkt[GROUP_PARALLELISM]; - int g_len[GROUP_PARALLELISM]; - int g_offset[GROUP_PARALLELISM]; - int g_n[GROUP_PARALLELISM]; - int g_residue[GROUP_PARALLELISM]; - unsigned char *pkt; - int xc0,ev_od,len,offset,n,residue; - struct csa_key_t* k; - int i,j,iter,g; - int t23,tsmall; - int alive[24]; -//icc craziness int pad1=0; //////////align! FIXME - unsigned char *encp[GROUP_PARALLELISM]; - MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8]; - MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8]; - MEMALIGN unsigned char ib[GROUP_PARALLELISM*8]; - MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8]; - struct stream_regs regs; - -//icc craziness i=(int)&pad1;//////////align!!! FIXME - - // build a list of packets to be processed - clst=cluster; - grouped=0; - advanced=0; - can_advance=1; - group_ev_od=-1; // silence incorrect compiler warning - pkt=*clst; - do{ // find a new packet - if(grouped==GROUP_PARALLELISM){ - // full - break; - } - if(pkt==NULL){ - // no more ranges - break; - } - if(pkt>=*(clst+1)){ - // out of this range, try next - clst++;clst++; - pkt=*clst; - continue; - } - - do{ // handle this packet - xc0=pkt[3]&0xc0; - DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance)); - if(xc0==0x00){ - DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance)); - advanced+=can_advance; - stat_no_scramble++; - break; - } - if(xc0==0x40){ - DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance)); - advanced+=can_advance; - stat_reserved++; - break; - } - if(xc0==0x80||xc0==0xc0){ // encrypted - ev_od=(xc0&0x40)>>6; // 0 even, 1 odd - if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd) - if(group_ev_od==ev_od){ // could be added to group - pkt[3]&=0x3f; // consider it decrypted now - if(pkt[3]&0x20){ // incomplete packet - offset=4+pkt[4]+1; - len=188-offset; - n=len>>3; - residue=len-(n<<3); - if(n==0){ // decrypted==encrypted! - DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance)); - advanced+=can_advance; - stat_decrypted_mini++; - break; // this doesn't need more processing - } - }else{ - len=184; - offset=4; - n=23; - residue=0; - } - g_pkt[grouped]=pkt; - g_len[grouped]=len; - g_offset[grouped]=offset; - g_n[grouped]=n; - g_residue[grouped]=residue; - DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue)); - grouped++; - advanced+=can_advance; - stat_decrypted[ev_od]++; - } - else{ - can_advance=0; - DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt)); - break; // skip and go on - } - } - } while(0); - - if(can_advance){ - // move range start forward - *clst+=188; - } - // next packet, if there is one - pkt+=188; - } while(1); - DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced)); - - // delete empty ranges and compact list - clst2=cluster; - for(clst=cluster;*clst!=NULL;clst+=2){ - // if not empty - if(*clst<*(clst+1)){ - // it will remain - *clst2=*clst; - *(clst2+1)=*(clst+1); - clst2+=2; - } - } - *clst2=NULL; - - if(grouped==0){ - // no processing needed - return advanced; - } - - // sort them, longest payload first - // we expect many n=23 packets and a few n<23 - DBG(fprintf(stderr,"PRESORTING\n")); - for(i=0;i=0;tsmall--){ - if(g_n[tsmall]==23) break; - } -DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall)); - - if(tsmall-t23<1) break; - -DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall)); - - g_swap(t23,tsmall); - - t23++; - tsmall--; -DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall)); - } - DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped)); - DBG(fprintf(stderr,"MIDSORTING\n")); - for(i=0;ig_n[i]){ - g_swap(i,j); - } - } - } - DBG(fprintf(stderr,"POSTSORTING\n")); - for(i=0;i=0;i--){ - alive[i]+=alive[i+1]; - } - DBG(fprintf(stderr,"ALIVE\n")); - for(i=0;i<=23;i++){ - DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i])); - } - - // choose key - if(group_ev_od==0){ - k=&((struct csa_keys_t *)keys)->even; - } - else{ - k=&((struct csa_keys_t *)keys)->odd; - } - - //INIT -//#define INITIALIZE_UNUSED_INPUT -#ifdef INITIALIZE_UNUSED_INPUT -// unnecessary zeroing. -// without this, we operate on uninitialized memory -// when grouped>>>>ITER 0\n")); - iter=0; - stream_cypher_group_init(®s,k->iA_g,k->iB_g,stream_in); - // fill first ib - for(g=0;g0;iter++){ -DBG(fprintf(stderr,">>>>>ITER %i\n",iter)); - // alive and just dead packets: calc block - block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]); -DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8)); - // all packets (dead too): calc stream - stream_cypher_group_normal(®s,stream_out); -//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG); - - // alive packets: calc ib - for(g=0;g>>>>ITER 23\n")); - iter=23; - // calc block - block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]); -DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8)); - // just dead packets: write decrypted data - for(g=alive[iter];g. - */ -#include "config.h" -#include "tvheadend.h" -#include "FFdecsa.h" - - - -typedef struct { - int (*get_internal_parallelism)(void); - int (*get_suggested_cluster_size)(void); - void *(*get_key_struct)(void); - void (*free_key_struct)(void *keys); - void (*set_control_words)(void *keys, const unsigned char *even, const unsigned char *odd); - - void (*set_even_control_word)(void *keys, const unsigned char *even); - void (*set_odd_control_word)(void *keys, const unsigned char *odd); - int (*decrypt_packets)(void *keys, unsigned char **cluster); - -} csafuncs_t; - - -#define MAKEFUNCS(x) \ -extern int get_internal_parallelism_##x(void);\ -extern int get_suggested_cluster_size_##x(void);\ -extern void *get_key_struct_##x(void);\ -extern void free_key_struct_##x(void *keys);\ -extern void set_control_words_##x(void *keys, const unsigned char *even, const unsigned char *odd);\ -extern void set_even_control_word_##x(void *keys, const unsigned char *even);\ -extern void set_odd_control_word_##x(void *keys, const unsigned char *odd);\ -extern int decrypt_packets_##x(void *keys, unsigned char **cluster);\ -static csafuncs_t funcs_##x = { \ - &get_internal_parallelism_##x,\ - &get_suggested_cluster_size_##x,\ - &get_key_struct_##x,\ - &free_key_struct_##x,\ - &set_control_words_##x,\ - &set_even_control_word_##x,\ - &set_odd_control_word_##x,\ - &decrypt_packets_##x\ -}; - -MAKEFUNCS(32int); -#ifdef CONFIG_MMX -MAKEFUNCS(64mmx); -#endif - -#ifdef CONFIG_SSE2 -MAKEFUNCS(128sse2); -#endif - -static csafuncs_t current; - - - - -#if defined(__x86_64__) -# define REG_a "rax" -# define REG_b "rbx" -# define REG_c "rcx" -# define REG_d "rdx" -# define REG_D "rdi" -# define REG_S "rsi" -# define PTR_SIZE "8" -typedef int64_t x86_reg; - -# define REG_SP "rsp" -# define REG_BP "rbp" -# define REGBP rbp -# define REGa rax -# define REGb rbx -# define REGc rcx -# define REGd rdx -# define REGSP rsp - -#elif defined(__i386__) - -# define REG_a "eax" -# define REG_b "ebx" -# define REG_c "ecx" -# define REG_d "edx" -# define REG_D "edi" -# define REG_S "esi" -# define PTR_SIZE "4" -typedef int32_t x86_reg; - -# define REG_SP "esp" -# define REG_BP "ebp" -# define REGBP ebp -# define REGa eax -# define REGb ebx -# define REGc ecx -# define REGd edx -# define REGSP esp -#else -typedef int x86_reg; -#endif - -#if defined(__i386__) || defined(__x86_64__) -static inline void -native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* saving ebx is necessary for PIC compatibility */ - asm volatile("mov %%"REG_b", %%"REG_S"\n\t" - "cpuid\n\t" - "xchg %%"REG_b", %%"REG_S - : "=a" (*eax), - "=S" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} -#endif - -void -ffdecsa_init(void) -{ - current = funcs_32int; - - -#if defined(__i386__) || defined(__x86_64__) - - unsigned int eax, ebx, ecx, edx; - unsigned int max_std_level, std_caps; - -#if defined(__i386__) - - x86_reg a, c; - __asm__ volatile ( - /* See if CPUID instruction is supported ... */ - /* ... Get copies of EFLAGS into eax and ecx */ - "pushfl\n\t" - "pop %0\n\t" - "mov %0, %1\n\t" - - /* ... Toggle the ID bit in one copy and store */ - /* to the EFLAGS reg */ - "xor $0x200000, %0\n\t" - "push %0\n\t" - "popfl\n\t" - - /* ... Get the (hopefully modified) EFLAGS */ - "pushfl\n\t" - "pop %0\n\t" - : "=a" (a), "=c" (c) - : - : "cc" - ); - - if (a != c) { -#endif - eax = ebx = ecx = edx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - max_std_level = eax; - - if(max_std_level >= 1){ - eax = 1; - native_cpuid(&eax, &ebx, &ecx, &edx); - std_caps = edx; - -#ifdef CONFIG_SSE2 - if (std_caps & (1<<26)) { - current = funcs_128sse2; - tvhinfo(LS_CSA, "Using SSE2 128bit parallel descrambling"); - return; - } -#endif - -#ifdef CONFIG_MMX - if (std_caps & (1<<23)) { - current = funcs_64mmx; - tvhinfo(LS_CSA, "Using MMX 64bit parallel descrambling"); - return; - } -#endif - } -#if defined(__i386__) - } -#endif -#endif - - tvhinfo(LS_CSA, "Using 32bit parallel descrambling"); -} - - -int -get_internal_parallelism(void) -{ - return current.get_internal_parallelism(); -} -int -get_suggested_cluster_size(void) -{ - return current.get_suggested_cluster_size(); -} - -void * -get_key_struct(void) -{ - return current.get_key_struct(); -} -void -free_key_struct(void *keys) -{ - current.free_key_struct(keys); -} - -void -set_even_control_word(void *keys, const unsigned char *even) -{ - current.set_even_control_word(keys, even); -} - -void -set_odd_control_word(void *keys, const unsigned char *odd) -{ - current.set_odd_control_word(keys, odd); -} - -int -decrypt_packets(void *keys, unsigned char **cluster) -{ - return current.decrypt_packets(keys, cluster); -} diff --git a/src/descrambler/ffdecsa/ffdecsa_mmx.c b/src/descrambler/ffdecsa/ffdecsa_mmx.c deleted file mode 100644 index bd6452c410..0000000000 --- a/src/descrambler/ffdecsa/ffdecsa_mmx.c +++ /dev/null @@ -1,2 +0,0 @@ -#define PARALLEL_MODE PARALLEL_64_MMX -#include "FFdecsa.c" diff --git a/src/descrambler/ffdecsa/ffdecsa_sse2.c b/src/descrambler/ffdecsa/ffdecsa_sse2.c deleted file mode 100644 index b24a1e83ab..0000000000 --- a/src/descrambler/ffdecsa/ffdecsa_sse2.c +++ /dev/null @@ -1,2 +0,0 @@ -#define PARALLEL_MODE PARALLEL_128_SSE2 -#include "FFdecsa.c" diff --git a/src/descrambler/ffdecsa/fftable.h b/src/descrambler/ffdecsa/fftable.h deleted file mode 100644 index bd4fd242f7..0000000000 --- a/src/descrambler/ffdecsa/fftable.h +++ /dev/null @@ -1,56 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2007 Dark Avenger - * 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef FFTABLE_H -#define FFTABLE_H - -void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data) -{ -#if 0 - *(((int *)tab)+2*g)=*((int *)data); - *(((int *)tab)+2*g+1)=*(((int *)data)+1); -#else - *(((long long *)tab)+g)=*((long long *)data); -#endif -} - -void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g) -{ -#if 1 - *((int *)data)=*(((int *)tab)+2*g); - *(((int *)data)+1)=*(((int *)tab)+2*g+1); -#else - *((long long *)data)=*(((long long *)tab)+g); -#endif -} - -void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g) -{ - int j; - for(j=0;j=4?32-1:0))+j); - } -} - -typedef unsigned int batch; -#define BYTES_PER_BATCH 4 -#define B_FFN_ALL_29() 0x29292929 -#define B_FFN_ALL_02() 0x02020202 -#define B_FFN_ALL_04() 0x04040404 -#define B_FFN_ALL_10() 0x10101010 -#define B_FFN_ALL_40() 0x40404040 -#define B_FFN_ALL_80() 0x80808080 - -#define M_EMPTY() diff --git a/src/descrambler/ffdecsa/parallel_064_mmx.h b/src/descrambler/ffdecsa/parallel_064_mmx.h deleted file mode 100644 index f86a926f68..0000000000 --- a/src/descrambler/ffdecsa/parallel_064_mmx.h +++ /dev/null @@ -1,83 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2007 Dark Avenger - * 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#define MEMALIGN __attribute__((aligned(16))) - -union __u64 { - unsigned int u[2]; - __m64 v; -}; - -static const union __u64 ff0 = {{0x00000000U, 0x00000000U}}; -static const union __u64 ff1 = {{0xffffffffU, 0xffffffffU}}; - -typedef __m64 group; -#define GROUP_PARALLELISM 64 -#define FF0() ff0.v -#define FF1() ff1.v -#define FFAND(a,b) _mm_and_si64((a),(b)) -#define FFOR(a,b) _mm_or_si64((a),(b)) -#define FFXOR(a,b) _mm_xor_si64((a),(b)) -#define FFNOT(a) _mm_xor_si64((a),FF1()) - -/* 64 rows of 64 bits */ - -static const union __u64 ff29 = {{0x29292929U, 0x29292929U}}; -static const union __u64 ff02 = {{0x02020202U, 0x02020202U}}; -static const union __u64 ff04 = {{0x04040404U, 0x04040404U}}; -static const union __u64 ff10 = {{0x10101010U, 0x10101010U}}; -static const union __u64 ff40 = {{0x40404040U, 0x40404040U}}; -static const union __u64 ff80 = {{0x80808080U, 0x80808080U}}; - -typedef __m64 batch; -#define BYTES_PER_BATCH 8 -#define B_FFAND(a,b) FFAND((a),(b)) -#define B_FFOR(a,b) FFOR((a),(b)) -#define B_FFXOR(a,b) FFXOR((a),(b)) -#define B_FFN_ALL_29() ff29.v -#define B_FFN_ALL_02() ff02.v -#define B_FFN_ALL_04() ff04.v -#define B_FFN_ALL_10() ff10.v -#define B_FFN_ALL_40() ff40.v -#define B_FFN_ALL_80() ff80.v -#define B_FFSH8L(a,n) _mm_slli_si64((a),(n)) -#define B_FFSH8R(a,n) _mm_srli_si64((a),(n)) - -#define M_EMPTY() _mm_empty() - - -#undef XOR_8_BY -#define XOR_8_BY(d,s1,s2) do { *(__m64*)d = _mm_xor_si64(*(__m64*)(s1), *(__m64*)(s2)); } while(0) - -#undef XOREQ_8_BY -#define XOREQ_8_BY(d,s) XOR_8_BY(d, d, s) - -#undef COPY_8_BY -#define COPY_8_BY(d,s) do { *(__m64 *)(d) = *(__m64 *)(s); } while(0) - -#undef BEST_SPAN -#define BEST_SPAN 8 - -#undef XOR_BEST_BY -#define XOR_BEST_BY(d,s1,s2) XOR_8_BY(d,s1,s2) - -#include "fftable.h" diff --git a/src/descrambler/ffdecsa/parallel_128_sse2.h b/src/descrambler/ffdecsa/parallel_128_sse2.h deleted file mode 100644 index 1e714af7df..0000000000 --- a/src/descrambler/ffdecsa/parallel_128_sse2.h +++ /dev/null @@ -1,82 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2007 Dark Avenger - * 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#define MEMALIGN __attribute__((aligned(16))) - -union __u128i { - unsigned int u[4]; - __m128i v; -}; - -static const union __u128i ff0 = {{0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U}}; -static const union __u128i ff1 = {{0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU}}; - -typedef __m128i group; -#define GROUP_PARALLELISM 128 -#define FF0() ff0.v -#define FF1() ff1.v -#define FFAND(a,b) _mm_and_si128((a),(b)) -#define FFOR(a,b) _mm_or_si128((a),(b)) -#define FFXOR(a,b) _mm_xor_si128((a),(b)) -#define FFNOT(a) _mm_xor_si128((a),FF1()) -#define MALLOC(X) _mm_malloc(X,16) -#define FREE(X) _mm_free(X) - -/* BATCH */ - -static const union __u128i ff29 = {{0x29292929U, 0x29292929U, 0x29292929U, 0x29292929U}}; -static const union __u128i ff02 = {{0x02020202U, 0x02020202U, 0x02020202U, 0x02020202U}}; -static const union __u128i ff04 = {{0x04040404U, 0x04040404U, 0x04040404U, 0x04040404U}}; -static const union __u128i ff10 = {{0x10101010U, 0x10101010U, 0x10101010U, 0x10101010U}}; -static const union __u128i ff40 = {{0x40404040U, 0x40404040U, 0x40404040U, 0x40404040U}}; -static const union __u128i ff80 = {{0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U}}; - -typedef __m128i batch; -#define BYTES_PER_BATCH 16 -#define B_FFN_ALL_29() ff29.v -#define B_FFN_ALL_02() ff02.v -#define B_FFN_ALL_04() ff04.v -#define B_FFN_ALL_10() ff10.v -#define B_FFN_ALL_40() ff40.v -#define B_FFN_ALL_80() ff80.v - -#define B_FFAND(a,b) FFAND(a,b) -#define B_FFOR(a,b) FFOR(a,b) -#define B_FFXOR(a,b) FFXOR(a,b) -#define B_FFSH8L(a,n) _mm_slli_epi64((a),(n)) -#define B_FFSH8R(a,n) _mm_srli_epi64((a),(n)) - -#define M_EMPTY() - -#undef BEST_SPAN -#define BEST_SPAN 16 - -#undef XOR_BEST_BY -static inline void XOR_BEST_BY(unsigned char *d, unsigned char *s1, unsigned char *s2) -{ - __m128i vs1 = _mm_load_si128((__m128i*)s1); - __m128i vs2 = _mm_load_si128((__m128i*)s2); - vs1 = _mm_xor_si128(vs1, vs2); - _mm_store_si128((__m128i*)d, vs1); -} - -#include "fftable.h" diff --git a/src/descrambler/ffdecsa/parallel_generic.h b/src/descrambler/ffdecsa/parallel_generic.h deleted file mode 100644 index 8275993ebc..0000000000 --- a/src/descrambler/ffdecsa/parallel_generic.h +++ /dev/null @@ -1,102 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - - - -#if 0 -//// generics -#define COPY4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ - *pd = *ps; }while(0) -#define COPY8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd = *ps; }while(0) -#define COPY16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd = *ps; \ - *(pd+1) = *(ps+1); }while(0) -#define COPY32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd = *ps; \ - *(pd+1) = *(ps+1) \ - *(pd+2) = *(ps+2) \ - *(pd+3) = *(ps+3); }while(0) -#define XOR4BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \ - *pd = *ps1 ^ *ps2; }while(0) -#define XOR8BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ - *pd = *ps1 ^ *ps2; }while(0) -#define XOR16BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ - *pd = *ps1 ^ *ps2; \ - *(pd+8) = *(ps1+8) ^ *(ps2+8); }while(0) -#define XOR32BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ - *pd = *ps1 ^ *ps2; \ - *(pd+1) = *(ps1+1) ^ *(ps2+1); \ - *(pd+2) = *(ps1+2) ^ *(ps2+2); \ - *(pd+3) = *(ps1+3) ^ *(ps2+3); }while(0) -#define XOR32BV(d,s1,s2) do{ int *const pd=(int *const)(d), *ps1=(const int *const)(s1), *ps2=(const int *const)(s2); \ - int z; \ - for(z=0;z<8;z++){ \ - pd[z]=ps1[z]^ps2[z]; \ - } \ - }while(0) -#define XOREQ4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ - *pd ^= *ps; }while(0) -#define XOREQ8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd ^= *ps; }while(0) -#define XOREQ16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd ^= *ps; \ - *(pd+1) ^=*(ps+1); }while(0) -#define XOREQ32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd ^= *ps; \ - *(pd+1) ^=*(ps+1); \ - *(pd+2) ^=*(ps+2); \ - *(pd+3) ^=*(ps+3); }while(0) -#define XOREQ32BY4(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ - *pd ^= *ps; \ - *(pd+1) ^=*(ps+1); \ - *(pd+2) ^=*(ps+2); \ - *(pd+3) ^=*(ps+3); \ - *(pd+4) ^=*(ps+4); \ - *(pd+5) ^=*(ps+5); \ - *(pd+6) ^=*(ps+6); \ - *(pd+7) ^=*(ps+7); }while(0) -#define XOREQ32BV(d,s) do{ unsigned char *pd=(unsigned char *)(d), *ps=(unsigned char *)(s); \ - int z; \ - for(z=0;z<32;z++){ \ - pd[z]^=ps[z]; \ - } \ - }while(0) - -#else -#define XOR_4_BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \ - *pd = *ps1 ^ *ps2; }while(0) -#define XOR_8_BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \ - *pd = *ps1 ^ *ps2; }while(0) -#define XOREQ_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ - *pd ^= *ps; }while(0) -#define XOREQ_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd ^= *ps; }while(0) -#define COPY_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \ - *pd = *ps; }while(0) -#define COPY_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \ - *pd = *ps; }while(0) - -#define BEST_SPAN 8 -#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0); -#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0); -#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0); - -#define END_MM do{ }while(0); -#endif diff --git a/src/descrambler/ffdecsa/parallel_std_def.h b/src/descrambler/ffdecsa/parallel_std_def.h deleted file mode 100644 index 701a130873..0000000000 --- a/src/descrambler/ffdecsa/parallel_std_def.h +++ /dev/null @@ -1,29 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#define FFXOR(a,b) ((a)^(b)) -#define FFAND(a,b) ((a)&(b)) -#define FFOR(a,b) ((a)|(b)) -#define FFNOT(a) (~(a)) - -#define B_FFAND(a,b) ((a)&(b)) -#define B_FFOR(a,b) ((a)|(b)) -#define B_FFXOR(a,b) ((a)^(b)) -#define B_FFSH8L(a,n) ((a)<<(n)) -#define B_FFSH8R(a,n) ((a)>>(n)) diff --git a/src/descrambler/ffdecsa/stream.c b/src/descrambler/ffdecsa/stream.c deleted file mode 100644 index f10dd8e899..0000000000 --- a/src/descrambler/ffdecsa/stream.c +++ /dev/null @@ -1,906 +0,0 @@ -/* FFdecsa -- fast decsa algorithm - * - * Copyright (C) 2003-2004 fatih89r - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - - -// define statics only once, when STREAM_INIT -#ifdef STREAM_INIT -struct stream_regs { - group A[32+10][4]; // 32 because we will move back (virtual shift register) - group B[32+10][4]; // 32 because we will move back (virtual shift register) - group X[4]; - group Y[4]; - group Z[4]; - group D[4]; - group E[4]; - group F[4]; - group p; - group q; - group r; - }; - -static inline void trasp64_32_88ccw(unsigned char *data){ -/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ -#define row ((unsigned int *)data) - int i,j; - for(j=0;j<64;j+=32){ - unsigned int t,b; - for(i=0;i<16;i++){ - t=row[j+i]; - b=row[j+16+i]; - row[j+i] = (t&0x0000ffff) | ((b )<<16); - row[j+16+i]=((t )>>16) | (b&0xffff0000) ; - } - } - for(j=0;j<64;j+=16){ - unsigned int t,b; - for(i=0;i<8;i++){ - t=row[j+i]; - b=row[j+8+i]; - row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); - row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); - } - } - for(j=0;j<64;j+=8){ - unsigned int t,b; - for(i=0;i<4;i++){ - t=row[j+i]; - b=row[j+4+i]; - row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f); - row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4); - } - } - for(j=0;j<64;j+=4){ - unsigned int t,b; - for(i=0;i<2;i++){ - t=row[j+i]; - b=row[j+2+i]; - row[j+i] =((t&0x33333333)<<2) | (b&0x33333333); - row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2); - } - } - for(j=0;j<64;j+=2){ - unsigned int t,b; - for(i=0;i<1;i++){ - t=row[j+i]; - b=row[j+1+i]; - row[j+i] =((t&0x55555555)<<1) | (b&0x55555555); - row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1); - } - } -#undef row -} - -static inline void trasp64_32_88cw(unsigned char *data){ -/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ -#define row ((unsigned int *)data) - int i,j; - for(j=0;j<64;j+=32){ - unsigned int t,b; - for(i=0;i<16;i++){ - t=row[j+i]; - b=row[j+16+i]; - row[j+i] = (t&0x0000ffff) | ((b )<<16); - row[j+16+i]=((t )>>16) | (b&0xffff0000) ; - } - } - for(j=0;j<64;j+=16){ - unsigned int t,b; - for(i=0;i<8;i++){ - t=row[j+i]; - b=row[j+8+i]; - row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8); - row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00); - } - } - for(j=0;j<64;j+=8){ - unsigned int t,b; - for(i=0;i<4;i++){ - t=row[j+i]; - b=row[j+4+i]; - row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0); - row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4); - } - } - for(j=0;j<64;j+=4){ - unsigned int t,b; - for(i=0;i<2;i++){ - t=row[j+i]; - b=row[j+2+i]; - row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc); - row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2); - } - } - for(j=0;j<64;j+=2){ - unsigned int t,b; - for(i=0;i<1;i++){ - t=row[j+i]; - b=row[j+1+i]; - row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa); - row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1); - } - } -#undef row -} - -//64-64---------------------------------------------------------- -static inline void trasp64_64_88ccw(unsigned char *data){ -/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ -#define row ((unsigned long long int *)data) - int i,j; - for(j=0;j<64;j+=64){ - unsigned long long int t,b; - for(i=0;i<32;i++){ - t=row[j+i]; - b=row[j+32+i]; - row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); - row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; - } - } - for(j=0;j<64;j+=32){ - unsigned long long int t,b; - for(i=0;i<16;i++){ - t=row[j+i]; - b=row[j+16+i]; - row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - } - } - for(j=0;j<64;j+=16){ - unsigned long long int t,b; - for(i=0;i<8;i++){ - t=row[j+i]; - b=row[j+8+i]; - row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - } - } - for(j=0;j<64;j+=8){ - unsigned long long int t,b; - for(i=0;i<4;i++){ - t=row[j+i]; - b=row[j+4+i]; - row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); - row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); - } - } - for(j=0;j<64;j+=4){ - unsigned long long int t,b; - for(i=0;i<2;i++){ - t=row[j+i]; - b=row[j+2+i]; - row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); - row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); - } - } - for(j=0;j<64;j+=2){ - unsigned long long int t,b; - for(i=0;i<1;i++){ - t=row[j+i]; - b=row[j+1+i]; - row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); - row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); - } - } -#undef row -} - -static inline void trasp64_64_88cw(unsigned char *data){ -/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ -#define row ((unsigned long long int *)data) - int i,j; - for(j=0;j<64;j+=64){ - unsigned long long int t,b; - for(i=0;i<32;i++){ - t=row[j+i]; - b=row[j+32+i]; - row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32); - row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ; - } - } - for(j=0;j<64;j+=32){ - unsigned long long int t,b; - for(i=0;i<16;i++){ - t=row[j+i]; - b=row[j+16+i]; - row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - } - } - for(j=0;j<64;j+=16){ - unsigned long long int t,b; - for(i=0;i<8;i++){ - t=row[j+i]; - b=row[j+8+i]; - row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - } - } - for(j=0;j<64;j+=8){ - unsigned long long int t,b; - for(i=0;i<4;i++){ - t=row[j+i]; - b=row[j+4+i]; - row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); - row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); - } - } - for(j=0;j<64;j+=4){ - unsigned long long int t,b; - for(i=0;i<2;i++){ - t=row[j+i]; - b=row[j+2+i]; - row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); - row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); - } - } - for(j=0;j<64;j+=2){ - unsigned long long int t,b; - for(i=0;i<1;i++){ - t=row[j+i]; - b=row[j+1+i]; - row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); - row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); - } - } -#undef row -} - -//64-128---------------------------------------------------------- -static inline void trasp64_128_88ccw(unsigned char *data){ -/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/ -#define halfrow ((unsigned long long int *)data) - int i,j; - for(j=0;j<64;j+=64){ - unsigned long long int t,b; - for(i=0;i<32;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+32+i)]; - halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); - halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+32+i)+1]; - halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); - halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; - } - } - for(j=0;j<64;j+=32){ - unsigned long long int t,b; - for(i=0;i<16;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+16+i)]; - halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+16+i)+1]; - halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - } - } - for(j=0;j<64;j+=16){ - unsigned long long int t,b; - for(i=0;i<8;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+8+i)]; - halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+8+i)+1]; - halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - } - } - for(j=0;j<64;j+=8){ - unsigned long long int t,b; - for(i=0;i<4;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+4+i)]; - halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); - halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+4+i)+1]; - halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL); - halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4); - } - } - for(j=0;j<64;j+=4){ - unsigned long long int t,b; - for(i=0;i<2;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+2+i)]; - halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); - halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+2+i)+1]; - halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL); - halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2); - } - } - for(j=0;j<64;j+=2){ - unsigned long long int t,b; - for(i=0;i<1;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+1+i)]; - halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); - halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+1+i)+1]; - halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL); - halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1); - } - } -#undef halfrow -} - -static inline void trasp64_128_88cw(unsigned char *data){ -/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/ -#define halfrow ((unsigned long long int *)data) - int i,j; - for(j=0;j<64;j+=64){ - unsigned long long int t,b; - for(i=0;i<32;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+32+i)]; - halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32); - halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ; - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+32+i)+1]; - halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32); - halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ; - } - } - for(j=0;j<64;j+=32){ - unsigned long long int t,b; - for(i=0;i<16;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+16+i)]; - halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+16+i)+1]; - halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16); - halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ; - } - } - for(j=0;j<64;j+=16){ - unsigned long long int t,b; - for(i=0;i<8;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+8+i)]; - halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+8+i)+1]; - halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8); - halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL); - } - } - for(j=0;j<64;j+=8){ - unsigned long long int t,b; - for(i=0;i<4;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+4+i)]; - halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); - halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+4+i)+1]; - halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL); - halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4); - } - } - for(j=0;j<64;j+=4){ - unsigned long long int t,b; - for(i=0;i<2;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+2+i)]; - halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); - halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+2+i)+1]; - halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL); - halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2); - } - } - for(j=0;j<64;j+=2){ - unsigned long long int t,b; - for(i=0;i<1;i++){ - t=halfrow[2*(j+i)]; - b=halfrow[2*(j+1+i)]; - halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); - halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); - t=halfrow[2*(j+i)+1]; - b=halfrow[2*(j+1+i)+1]; - halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL); - halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1); - } - } -#undef halfrow -} -#endif - - -#ifdef STREAM_INIT -static void stream_cypher_group_init( - struct stream_regs *regs, - group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key. - group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key. - unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input. -#endif -#ifdef STREAM_NORMAL -static void stream_cypher_group_normal( - struct stream_regs *regs, - unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output. -#endif -{ -#ifdef STREAM_INIT - group in1[4]; - group in2[4]; -#endif - group extra_B[4]; - group fa,fb,fc,fd,fe; - group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b; - group next_E[4]; - group tmp0,tmp1,tmp2,tmp3,tmp4; -#ifdef STREAM_INIT - group *sb_g=(group *)sb; -#endif -#ifdef STREAM_NORMAL - group *cb_g=(group *)cb; -#endif - int aboff; - int i,j,k,b; - int dbg; - -#ifdef STREAM_INIT - DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n")); -#endif -#ifdef STREAM_NORMAL - DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n")); -#endif -#ifdef STREAM_INIT -for(j=0;j<64;j++){ - DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j)); - DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG)); -} - -DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG)); -#if GROUP_PARALLELISM==32 -trasp64_32_88ccw(sb); -#endif -#if GROUP_PARALLELISM==64 -trasp64_64_88ccw(sb); -#endif -#if GROUP_PARALLELISM==128 -trasp64_128_88ccw(sb); -#endif -DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG)); - -for(j=0;j<64;j++){ - DBG(fprintf(stderr,"precall stream_in[%2i]=",j)); - DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG)); -} -#endif - - aboff=32; - -#ifdef STREAM_INIT - // load first 32 bits of ck into A[aboff+0]..A[aboff+7] - // load last 32 bits of ck into B[aboff+0]..B[aboff+7] - // all other regs = 0 - for(i=0;i<8;i++){ - for(b=0;b<4;b++){ -DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b)); -DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG)); -DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b)); -DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG)); - regs->A[aboff+i][b]=iA[i][b]; - regs->B[aboff+i][b]=iB[i][b]; - } - } - for(b=0;b<4;b++){ - regs->A[aboff+8][b]=FF0(); - regs->A[aboff+9][b]=FF0(); - regs->B[aboff+8][b]=FF0(); - regs->B[aboff+9][b]=FF0(); - } - for(b=0;b<4;b++){ - regs->X[b]=FF0(); - regs->Y[b]=FF0(); - regs->Z[b]=FF0(); - regs->D[b]=FF0(); - regs->E[b]=FF0(); - regs->F[b]=FF0(); - } - regs->p=FF0(); - regs->q=FF0(); - regs->r=FF0(); -#endif - -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"dbg A0[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->A[aboff+0][dbg],BYPG,BYPG)); - DBG(fprintf(stderr,"dbg B0[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->B[aboff+0][dbg],BYPG,BYPG)); -} - -//////////////////////////////////////////////////////////////////////////////// - - // EXTERNAL LOOP - 8 bytes per operation - for(i=0;i<8;i++){ - - DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i)); - -#ifdef STREAM_INIT - for(b=0;b<4;b++){ - in1[b]=sb_g[8*i+4+b]; - in2[b]=sb_g[8*i+b]; - } -#endif - - // INTERNAL LOOP - 2 bits per iteration - for(j=0; j<4; j++){ - - DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j)); - - // from A0..A9, 35 bits are selected as inputs to 7 s-boxes - // 5 bits input per s-box, 2 bits output per s-box - - // we can select bits with zero masking and shifting operations - // and synthetize s-boxes with optimized boolean functions. - // this is the actual reason we do all the crazy transposition - // stuff to switch between normal and bit slice representations. - // this code really flies. - - fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0]; -/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) ); -/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) ); -/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) ); -/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) ); -/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1()))); -/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1())); -/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc))); -/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd))); - s1a=FFXOR(tmp0,FFAND(fe,tmp1)); - s1b=FFXOR(tmp2,FFAND(fe,tmp3)); -//dump_mem("s1as1b-fe",&fe,BYPG,BYPG); -//dump_mem("s1as1b-fa",&fa,BYPG,BYPG); -//dump_mem("s1as1b-fb",&fb,BYPG,BYPG); -//dump_mem("s1as1b-fc",&fc,BYPG,BYPG); -//dump_mem("s1as1b-fd",&fd,BYPG,BYPG); - - fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1]; -/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) ); -/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) ); -/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) ); -/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) ); -/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1())))); -/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc)); -/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1())))); -/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd)))); - s2a=FFXOR(tmp0,FFAND(fe,tmp1)); - s2b=FFXOR(tmp2,FFAND(fe,tmp3)); - - fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2]; -/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) ); -/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) ); -/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) ); -/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; -/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd))); -/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1())))); -/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc)); -/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); - s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1)); - s3b=FFXOR(tmp2,FFAND(fe,tmp3)); - - fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0]; -/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) ); -/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) ); -/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) ); -/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES; -/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1()))))); -/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc))); -/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd))); -/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1(); - s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0))); - s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3)); - - fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2]; -/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) ); -/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) ); -/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) ); -/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd ); -/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1())); -/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd))))); -/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd))); -/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd); - s5a=FFXOR(tmp0,FFAND(fe,tmp1)); - s5b=FFXOR(tmp2,FFAND(fe,tmp3)); - - fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3]; -/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) ); -/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES ); -/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) ); -/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) ); -/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc)); -/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1()); -/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd))); -/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd))); - s6a=FFXOR(tmp0,FFAND(fe,tmp1)); - s6b=FFXOR(tmp2,FFAND(fe,tmp3)); - - fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3]; -/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) ); -/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) ); -/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) ); -/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) ); -/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd)))); -/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd)))); -/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd)); -/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1())); - s7a=FFXOR(tmp0,FFAND(fe,tmp1)); - s7b=FFXOR(tmp2,FFAND(fe,tmp3)); - - -/* - we have just done this: - - int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0}; - int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1}; - int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1}; - int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1}; - int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2}; - int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0}; - int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2}; - - s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ] - |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ]; - s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ] - |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ]; - s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ] - |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ]; - s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ]; -*/ - - // use 4x4 xor to produce extra nibble for T3 - - extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]); - extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]); - extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]); - extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]); -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"extra_B[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG)); -} - - // T1 = xor all inputs - // in1, in2, D are only used in T1 during initialisation, not generation - for(b=0;b<4;b++){ - regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]); - } - -#ifdef STREAM_INIT - for(b=0;b<4;b++){ - regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b])); - } -#endif - -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"next_A0[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->A[aboff-1][dbg],BYPG,BYPG)); -} - - // T2 = xor all inputs - // in1, in2 are only used in T1 during initialisation, not generation - // if p=0, use this, if p=1, rotate the result left - for(b=0;b<4;b++){ - regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]); - } - -#ifdef STREAM_INIT - for(b=0;b<4;b++){ - regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b])); - } -#endif - -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"next_B0[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG)); -} - - // if p=1, rotate left (yes, this is what we're doing) - tmp3=regs->B[aboff-1][3]; - regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p)); - regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p)); - regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p)); - regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p)); - -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"next_B0[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG)); -} - - // T3 = xor all inputs - for(b=0;b<4;b++){ - regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]); - } - -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"D[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->D[dbg],BYPG,BYPG)); -} - - // T4 = sum, carry of Z + E + r - for(b=0;b<4;b++){ - next_E[b]=regs->F[b]; - } - - tmp0=FFXOR(regs->Z[0],regs->E[0]); - tmp1=FFAND(regs->Z[0],regs->E[0]); - regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r))); - tmp3=FFAND(tmp0,regs->r); - tmp4=FFOR(tmp1,tmp3); - - tmp0=FFXOR(regs->Z[1],regs->E[1]); - tmp1=FFAND(regs->Z[1],regs->E[1]); - regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4))); - tmp3=FFAND(tmp0,tmp4); - tmp4=FFOR(tmp1,tmp3); - - tmp0=FFXOR(regs->Z[2],regs->E[2]); - tmp1=FFAND(regs->Z[2],regs->E[2]); - regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4))); - tmp3=FFAND(tmp0,tmp4); - tmp4=FFOR(tmp1,tmp3); - - tmp0=FFXOR(regs->Z[3],regs->E[3]); - tmp1=FFAND(regs->Z[3],regs->E[3]); - regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4))); - tmp3=FFAND(tmp0,tmp4); - regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry - -/* - we have just done this: (believe it or not) - - if (q) { - F = Z + E + r; - r = (F >> 4) & 1; - F = F & 0x0f; - } - else { - F = E; - } -*/ - for(b=0;b<4;b++){ - regs->E[b]=next_E[b]; - } -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"F[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->F[dbg],BYPG,BYPG)); -} -DBG(fprintf(stderr,"r=")); -DBG(dump_mem("",(unsigned char *)®s->r,BYPG,BYPG)); -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"E[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->E[dbg],BYPG,BYPG)); -} - - // this simple instruction is virtually shifting all the shift registers - aboff--; - -/* - we've just done this: - - A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0; - B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0; -*/ - - regs->X[0]=s1a; - regs->X[1]=s2a; - regs->X[2]=s3b; - regs->X[3]=s4b; - regs->Y[0]=s3a; - regs->Y[1]=s4a; - regs->Y[2]=s5b; - regs->Y[3]=s6b; - regs->Z[0]=s5a; - regs->Z[1]=s6a; - regs->Z[2]=s1b; - regs->Z[3]=s2b; - regs->p=s7a; - regs->q=s7b; -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"X[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->X[dbg],BYPG,BYPG)); -} -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"Y[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->Y[dbg],BYPG,BYPG)); -} -for(dbg=0;dbg<4;dbg++){ - DBG(fprintf(stderr,"Z[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)®s->Z[dbg],BYPG,BYPG)); -} -DBG(fprintf(stderr,"p=")); -DBG(dump_mem("",(unsigned char *)®s->p,BYPG,BYPG)); -DBG(fprintf(stderr,"q=")); -DBG(dump_mem("",(unsigned char *)®s->q,BYPG,BYPG)); - -#ifdef STREAM_NORMAL - // require 4 loops per output byte - // 2 output bits are a function of the 4 bits of D - // xor 2 by 2 - cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]); - cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]); -for(dbg=0;dbg<8;dbg++){ - DBG(fprintf(stderr,"op[%i]=",dbg)); - DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG)); -} -#endif - -DBG(fprintf(stderr,"---END INTERNAL LOOP\n")); - - } // INTERNAL LOOP - -DBG(fprintf(stderr,"--END EXTERNAL LOOP\n")); - - } // EXTERNAL LOOP - - // move 32 steps forward, ready for next call - for(k=0;k<10;k++){ - for(b=0;b<4;b++){ -DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b)); - regs->A[32+k][b]=regs->A[k][b]; - regs->B[32+k][b]=regs->B[k][b]; - } - } - - -//////////////////////////////////////////////////////////////////////////////// - -#ifdef STREAM_NORMAL -for(j=0;j<64;j++){ - DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j)); - DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG)); -} - -#if GROUP_PARALLELISM==32 -trasp64_32_88cw(cb); -#endif -#if GROUP_PARALLELISM==64 -trasp64_64_88cw(cb); -#endif -#if GROUP_PARALLELISM==128 -trasp64_128_88cw(cb); -#endif - -for(j=0;j<64;j++){ - DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j)); - DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG)); -} -#endif - -#ifdef STREAM_INIT - DBG(fprintf(stderr,":::::::::: END STREAM INIT\n")); -#endif -#ifdef STREAM_NORMAL - DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n")); -#endif - -} -