False error when opening decrypted document with Crypto library of Python 2.7.9

0

I tested the following script in Python, using Python 2.7.9, with some adaptations made by me, available at link :

# Cifra e decifra documentos nos formatos .pdf, .docx e .rtf
# Adaptacao de Marcelo Ferreira Zochio

from Crypto import Random
from Crypto.Cipher import AES
import hashlib

def pad(s):
    padding_size = AES.block_size - len(s) % AES.block_size
    return s + b"
# Cifra e decifra documentos nos formatos .pdf, .docx e .rtf
# Adaptacao de Marcelo Ferreira Zochio

from Crypto import Random
from Crypto.Cipher import AES
import hashlib

def pad(s):
    padding_size = AES.block_size - len(s) % AES.block_size
    return s + b"%pre%" * padding_size, padding_size

def encrypt(message, key, key_size=256):
    message, padding_size = pad(message)
    iv = Random.new().read(AES.block_size)
    cipher = AES.new(key, AES.MODE_CFB, iv)
    enc_bytes = iv + cipher.encrypt(message) + bytes([padding_size])    
    return enc_bytes

def decrypt(ciphertext, key):
    iv = ciphertext[:AES.block_size]
    cipher = AES.new(key, AES.MODE_CFB, iv)
    plaintext = cipher.decrypt(ciphertext)
    return plaintext

def encrypt_file(file_name, key):
    with open(file_name, 'rb') as fo:
        plaintext = fo.read()    
    enc = encrypt(plaintext, key)
    with open(file_name + ".crp", 'wb') as fo:
        fo.write(enc)

def decrypt_file(file_name, key):
    with open(file_name, 'rb') as fo:
        ciphertext = fo.read()
    dec = decrypt(ciphertext, key)
    with open('decifrado_' + file_name, 'wb') as fo:
        fo.write(dec)

key = 'chave'
hash_object = hashlib.md5(key.encode())

while True:
    filename = raw_input("Arquivo a ser trabalhado: ")
    en_de = raw_input("En (cifrar) ou De (decifrar)?")
    if en_de.upper() == 'EN':
        encrypt_file(filename, hash_object.hexdigest())
    elif en_de.upper() == 'DE':
        decrypt_file(filename, hash_object.hexdigest())
    else:
        print("Escolher en ou de!")

    cont = raw_input("Continuar?")
    if cont.upper() == 'N':
        break
" * padding_size, padding_size def encrypt(message, key, key_size=256): message, padding_size = pad(message) iv = Random.new().read(AES.block_size) cipher = AES.new(key, AES.MODE_CFB, iv) enc_bytes = iv + cipher.encrypt(message) + bytes([padding_size]) return enc_bytes def decrypt(ciphertext, key): iv = ciphertext[:AES.block_size] cipher = AES.new(key, AES.MODE_CFB, iv) plaintext = cipher.decrypt(ciphertext) return plaintext def encrypt_file(file_name, key): with open(file_name, 'rb') as fo: plaintext = fo.read() enc = encrypt(plaintext, key) with open(file_name + ".crp", 'wb') as fo: fo.write(enc) def decrypt_file(file_name, key): with open(file_name, 'rb') as fo: ciphertext = fo.read() dec = decrypt(ciphertext, key) with open('decifrado_' + file_name, 'wb') as fo: fo.write(dec) key = 'chave' hash_object = hashlib.md5(key.encode()) while True: filename = raw_input("Arquivo a ser trabalhado: ") en_de = raw_input("En (cifrar) ou De (decifrar)?") if en_de.upper() == 'EN': encrypt_file(filename, hash_object.hexdigest()) elif en_de.upper() == 'DE': decrypt_file(filename, hash_object.hexdigest()) else: print("Escolher en ou de!") cont = raw_input("Continuar?") if cont.upper() == 'N': break

It works perfectly, however, when opening decrypted .docx and .odt documents (erasing the .crp extension and leaving the original) Windows warns that the document is corrupted, and if I want to recover that document; choosing yes, he recovers it normally and then just save it.

This does not happen with .pdf or .txt. Is there anything to do with Word or Open Office character formatting?

    
asked by anonymous 30.10.2017 / 18:37

1 answer

2

AES is an algorithm of block encryption , and works with fixed-size blocks of 16 bytes or 128 bits , no more, no less.

This means that your implementation should take some important into consideration:

  • Minor data that block size needs to be "padded" until they reach the block size;

  • Larger data that the block size needs to be "fragmented" into chunks of the same block size, and of course, "completed" when needed;

  • Small-sized blocks that were "padded" during the cipher operation need to be "truncated" ( unpadding ) in order to recover the given in its original size during the decipher operation;

  • When encrypting files, their original size must be stored together with the ciphered data to enable unpadding (last ) of the last block during the decryption operation;

    / li>
  • Encrypting and decrypting files should happen in chunks, avoiding storing the entire file in memory at once before being processed.

  • Its implementation violates AES , which is certainly the cause of the destruction of the original data.

    Another point is that your implementation writes the encrypted file to 16 bytes representation, this is not necessary, encrypted data can be written in binary format.

    Your implementation loads the file to be encrypted / decrypted completely into memory! This creates a limitation if the available memory is smaller than the file size.

    Based in this and this reference, follows a class that can encrypt and decrypt files and data in the correct way without corrupting content:

    import os
    import hashlib
    import base64
    import struct
    from Crypto.Cipher import AES
    from Crypto import Random
    
    chunksize = 64 * 1024
    BS = 16
    pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
    unpad = lambda s : s[:-ord(s[len(s)-1:])]
    
    class AESCipher:
    
        def __init__( self, key ):
            keydigest = hashlib.sha1(key).digest()
            self.key = keydigest[:16]
    
        def encrypt( self, raw ):
            raw = pad(raw)
            iv = Random.new().read( AES.block_size )
            cipher = AES.new( self.key, AES.MODE_CBC, iv )
            return base64.b64encode( iv + cipher.encrypt( raw ) )
    
        def decrypt( self, enc ):
            enc = base64.b64decode(enc)
            iv = enc[:16]
            cipher = AES.new(self.key, AES.MODE_CBC, iv )
            return unpad(cipher.decrypt( enc[16:] ))
    
        def encrypt_file( self, in_filename, out_filename ):
            iv = Random.new().read( AES.block_size )
            encryptor = AES.new(self.key, AES.MODE_CBC, iv)
            filesize = os.path.getsize(in_filename)
            with open( in_filename, 'rb' ) as infile:
                with open( out_filename, 'wb' ) as outfile:
                    outfile.write( struct.pack('<Q', filesize) )
                    outfile.write(iv)
                    while True:
                        chunk = infile.read(chunksize)
                        if len(chunk) == 0:
                            break
                        elif len(chunk) % BS != 0:
                            chunk += ' ' * (BS - len(chunk) % BS)
                        outfile.write(encryptor.encrypt(chunk))
    
        def decrypt_file( self, in_filename, out_filename ):
            with open(in_filename, 'rb') as infile:
                origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
                iv = infile.read(16)
                decryptor = AES.new(self.key, AES.MODE_CBC, iv)
                with open(out_filename, 'wb') as outfile:
                    while True:
                        chunk = infile.read(chunksize)
                        if len(chunk) == 0:
                            break
                        outfile.write(decryptor.decrypt(chunk))
                    outfile.truncate(origsize)
    
    
    # Definindo uma chave
    chave = "Oi! Eu sou uma chave de tamanho indefinido!"
    
    # Cria uma instancia do Objeto De/Cifrador AES
    aes = AESCipher( chave )
    
    # Testando cifragem de dados/texto
    cifrado = aes.encrypt( "Eu sou uma mensagem super secreta." )
    decifrado = aes.decrypt( cifrado )
    
    print cifrado
    print decifrado
    
    # Testando cifragem de arquivo
    aes.encrypt_file( "secreto.txt", "cifrado.bin" )
    
    # Testando decifragem do arquivo
    aes.decrypt_file( "cifrado.bin", "decifrado.txt" )
    

    Possible Output:

    $ python AESCipher.py
    zceFuiV9RTqFsBSY2AYcWMUXqYqI5+3yR08DsH/GeofcSFsg1KpjN4KKL+MaUq4Qmfa9uMFjXL4Ng41giNMGUQ==
    Eu sou uma mensagem super secreta.
    

    Checking signature item 5 (hash ) of the files:

    $ md5sum secreto.txt cifrado.bin decifrado.txt
    77daefe247686325a5da08e556aba4f0  secreto.txt
    e2645f7b2d0af5f79b5108707bb3a13d  cifrado.bin
    77daefe247686325a5da08e556aba4f0  decifrado.txt
    
        
    31.10.2017 / 10:00