// gcc zipfilerecover.c --std=c99 -o zipfilerecover
// Tested to compile under ubuntu, should compile under almost anthing, if you have stdint.h. I've managed to get it to compile for windows under Visual Studio 6.
// Zipfilerecover is released under a Creative Commons license: http://creativecommons.org/licenses/by-nc-nd/3.0/

// The utility is free for non-commercial use - should you want to recover a personal document, go ahead. This espicially includes non-profit or public educational organisations, such as public schools. I made this for you, after a student at the school in which I work corrupted a Word document.
// Should you be a commercial organisation, then I don't see why I shouldn't draw a little profit from my efforts. A £2 one-off fee sounds fair, entitling you or your company to use ziprecover as many times as you want for all internal purposes. This licensing cost is only for successful use: If zipfilerecover can't recover the data you need, sorry about that, but no charge. You can try it right now, without payment, to see if it'll do what you need. I understand if you have a user  demanding you get back their report for the presentation this evening, so it's quite acceptable if you need to delay the payment a few days while more urgent things are done. Just paypal it to goldenpi@blueyonder.co.uk at your convenience, unless you can find me specifying a newer address somewhere.
// That's about as informal as a software license can get, and I imagine of very dubious legal binding, so I'm really relying on the goodwill of users to enforce it. Go ahead, and support the hobbyist who just saved your document.

//You should have two files: This C source file, and a precompiled windows executable zipfilerecover.exe. Use whichever is appropriate to your platform of choice.

// The CC license specifies no derivitative works. This is for two reasons: One is that I don't want someone to take my code and release it as 'Office Recover Pro' for $29.99. The other is that .zip is an ugly, dated format, and this program contains even uglier code - and very little of actual innovation. It's nothing that any intermediate-level coder couldn't throw together in a few evenings with a copy of the zip specification, and I don't see why anyone would ever want to support zip these days if not for it's use in office documents.

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

typedef struct{
  uint16_t minversion;
  uint16_t bitflags;
  uint16_t compressionmethod;
  uint16_t modifiedtime;
  uint16_t modifieddate;
  uint32_t filecrc;
  uint32_t compressedsize;
  uint32_t uncompressedsize;
  uint16_t filenamelen;
  uint16_t extradatalen;
  unsigned char *filename;
  unsigned char *extradata;
} zipsubfileheader;

void printhelp();
int processfileentry(FILE *zipfile);
unsigned int nextzip=0;
unsigned int dummyrun=0;
unsigned int writeraw=0;
struct zipsubfileheader *newdirectory;

int processfileentry(FILE *zipfile){
  uint32_t fileheader;
  fread(&fileheader, 1, 4, zipfile);
  if(feof(zipfile)){
    printf("\n\nEnd of file reached.\n\n");
    return(1);
  }
  printf("File number %u\n", nextzip);
  switch (fileheader){
    case 0x04034B50:
      printf("Header: %08X - correct magic bytes for a file within a ZIP.\n", fileheader);
      break;
    case 0x02014B50:
      printf("Header: %08X - ZIP file central directory. End of data section.\n", fileheader);
      return(1);
    default:
      printf("Header: %08X - incorrect magic bytes. File corrupt or not a zip?\n", fileheader);
      return(1);

  }
  zipsubfileheader newheader;;
  newdirectory=realloc(newdirectory, (nextzip+1)*sizeof(zipsubfileheader));

  fread(&newheader.minversion, 1, 2, zipfile);
  printf("Minimum version to extract: %u - ziprecover ignores this value.\n", newheader.minversion);
  fread(&newheader.bitflags, 1, 2, zipfile);
  printf("General purpose bit flags: 0x%04X - only bit 3 matters to us. If it's a one, ziprecover can't help you.\n", newheader.bitflags);
  fread(&newheader.compressionmethod, 1, 2, zipfile);
  printf("Compression method: %u", newheader.compressionmethod);
  switch(newheader.compressionmethod){
    case 8:
      printf(" (Deflate)\n");
      break;
    case 6:
      printf(" (Implode)\n");
      break;
    case 1:
      printf(" (Shrink)\n");
      break;
    case 0:
      printf(" (Stored, uncompressed)\n");
      break;
    default:
      printf("  (Unknown)\n");
  }
  fread(&newheader.modifiedtime, 1, 2, zipfile);
  fread(&newheader.modifieddate, 1, 2, zipfile);
  printf("Last modification time/date: %u/%u\n", newheader.modifiedtime, newheader.modifieddate);
  fread(&newheader.filecrc, 1, 4, zipfile);
  printf("File CRC: 0x%08X\n", newheader.filecrc);
  fread(&newheader.compressedsize, 1, 4, zipfile);
  fread(&newheader.uncompressedsize, 1, 4, zipfile);
  printf("File size uncompressed/compressed: %u/%u\n", newheader.uncompressedsize, newheader.compressedsize);  
  fread(&newheader.filenamelen, 1, 2, zipfile);
//  uint16_t extradatalen;
  fread(&newheader.extradatalen, 1, 2, zipfile);
  printf("Extra data len: %u\n", newheader.extradatalen);
  newheader.filename=malloc(newheader.filenamelen+1);
  memset(newheader.filename, 0, newheader.filenamelen+1);
  fread(newheader.filename, 1, newheader.filenamelen, zipfile);
  printf("File name (length): %s (%u)\n", newheader.filename, newheader.filenamelen);
  printf("Filename: \"%s\"\n", newheader.filename);
  printf("Extra data: ");
  unsigned int extradatalen=newheader.extradatalen;
  for(;extradatalen--;){ //Extra data is discarded. This does mean no recovering passworded files, but it also means no chance of this extra data being something that's causing extraction software to crash.
    printf("%02X",getc(zipfile));
  }
  printf("\n\n");

  //It's read. Now to write it out.
  if(dummyrun){
    for(;newheader.compressedsize--;){
      getc(zipfile);
    }
    return(0);
  }
  char *outfilename=(char *)malloc(32);
  char *rawoutfilename=(char *)malloc(32);
  sprintf(outfilename, "%u.zip", nextzip);
  sprintf(rawoutfilename, "%u.dat", nextzip++);
  FILE *outzip=fopen(outfilename, "wb");
  free(outfilename);
  fputc(0x50, outzip);fputc(0x4b, outzip);fputc(0x03, outzip);fputc(0x04, outzip); //Magic bytes for a file entry.
  fputc(0x00, outzip);fputc(0x00, outzip); //Version needed to extract
  fwrite(&newheader.bitflags, 1, 2, outzip);
  fwrite(&newheader.compressionmethod, 1, 2, outzip);
  fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip); //File modified time/date. Don't care about those.
  fwrite(&newheader.filecrc, 1, 4, outzip);
  fwrite(&newheader.compressedsize, 1, 4, outzip);
  fwrite(&newheader.uncompressedsize, 1, 4, outzip);
  fwrite(&newheader.filenamelen, 1, 2, outzip);
  fputc(0x00, outzip);fputc(0x00, outzip); //Extra data field length. Not going to copy that.
  fwrite(newheader.filename, 1, newheader.filenamelen, outzip);
  int bytesremaining=newheader.compressedsize;
  FILE *rawoutfile;
  if(writeraw)
    rawoutfile=fopen(rawoutfilename, "wb");
  for(;bytesremaining--;){
    unsigned char thisbyte=getc(zipfile);
    fputc(thisbyte, outzip);
    if(writeraw)
      fputc(thisbyte, rawoutfile);
  }
  if(writeraw)
    fclose(rawoutfile);
  free(rawoutfilename);
  //The central directory entry for this file:
  fputc(0x50, outzip);fputc(0x4b, outzip);fputc(0x01, outzip);fputc(0x02, outzip); //Magic bytes: Central directory.
  fputc(0x00, outzip);  fputc(0x00, outzip); //Version made by.
  fputc(0x00, outzip);  fputc(0x00, outzip); //Version to extract.
  fwrite(&newheader.bitflags, 1, 2, outzip);
  fwrite(&newheader.compressionmethod, 1, 2, outzip);
  fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip); //File modified time/date. Don't care about those.
  fwrite(&newheader.filecrc, 1, 4, outzip);
  fwrite(&newheader.compressedsize, 1, 4, outzip);
  fwrite(&newheader.uncompressedsize, 1, 4, outzip);
  fwrite(&newheader.filenamelen, 1, 2, outzip);
  fputc(0x00, outzip);fputc(0x00, outzip); //Extra field length
  fputc(0x00, outzip);fputc(0x00, outzip); //File comment length
  fputc(0x00, outzip);fputc(0x00, outzip); //Disk number start
  fputc(0x00, outzip);fputc(0x00, outzip); //Internal file attributes
  fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip); //External file attributes
  fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip);fputc(0x00, outzip); //Offset of local header.
  fwrite(newheader.filename, 1, newheader.filenamelen, outzip);
  //And the end-of-central-directory record:
  fputc(0x50, outzip);fputc(0x4b, outzip);fputc(0x05, outzip);fputc(0x06, outzip); //Magic bytes for EoCD record.

  fputc(0x00, outzip);fputc(0x00, outzip); //Disk number.
  fputc(0x00, outzip);fputc(0x00, outzip); //Number of disk with the start of the CD record.
  fputc(0x01, outzip);fputc(0x00, outzip); //Number of entries in the CD on this disk (One)
  fputc(0x01, outzip);fputc(0x00, outzip); //Number of entries in the CD total (One)
  uint32_t directoryoffset=30+newheader.filenamelen+newheader.compressedsize;
  uint32_t directorysize=46+newheader.filenamelen;
  fwrite(&directorysize, 1, 4, outzip);
  fwrite(&directoryoffset, 1, 4, outzip);
  fputc(0x00, outzip);fputc(0x00, outzip); //Zipfile comment length.
  fclose(outzip);
  return(0);
}

int main(int argc, char *argv[]){
  if(argc==1){
    printhelp();
    return(0);
  }
  char *filename; //Notice the lack of malloc. This is intentional.
  for(int n=1; n<argc; n++){
    if(strcmp(argv[n], "-h") == 0){
      printhelp();
      return(0);
    }
    if(strcmp(argv[n], "-d") == 0){
      printf("Analyse only - not writing any output files.\n");
      dummyrun=1;
    }
    if(strcmp(argv[n], "-u") == 0){
      printf("Writing compressed data to seperate file.\n");
      writeraw=1;
    }
    if(argv[n][0] != '-')
      filename=argv[n]; //Sneaky.
  }
  if(writeraw && dummyrun){
    printf("You specified -d and -u. These options are mutually exclusive.\n");
    return(1);
  }
  printf("\nOpening file %s\n", filename);
  FILE *zipfile=fopen(filename, "rb");
  if(!zipfile){
    printf("Could not open file.\n");
    return(1);
  }
  do;while(!processfileentry(zipfile));
  return(0);
 
}

void printhelp(){
  //1.0 worked.
  //1.1 tidied up the code a little by introducing a structure to hold all file information rather than lots of unrelated variables.

  printf("zipfilerecover 1.1\n  ziprecover will extract help data from a ZIP file - or a zip-based format such as a Microsoft Office 2007 document. It is designed to handle trunctuated or corrupted files, and is intended as a data recovery utility rather than a general-purpose extractor.\n  Ziprecover does not impliment any form of decompression, Instead, given a zip (or MS Office 2007 or later document, or other ZIP based format) it will break it apart into it's constituent files and write them to a series of new ZIP files named 01.zip, 02.zip, 03.zip and so on. The source zip's directory record is ignored, allowing trunctuated files to be read, and splitting each contained file into it's own ZIP confines any corruption that might otherwise cause difficulty in decompression.\n  To use:\n    ziprecover <corrupt.zip> [options]\n    Note that this will create a potentially very large number of new ZIP files (1.zip ... <n>.zip) in the current directory, which will overwrite any files of the same name already present.\n  Options:\n    -d    Dummy run, don't create output files.\n    -u   Write compressed data to <n>.dat, in addition to <n>.zip. These files may be of use for more advanced recovery attempts.\n\nIt's up to you to sort through the mess of data output. If luck is on your side, you'll find what you need in one of the zip files. Text from Office documents is stored in XML, but you'll need some regex-fu if you want to get it into something you can read easily.\n\n");
}
