Ruby 1.9.3p327(2012-11-10revision37606)
ext/digest/digest.c
Go to the documentation of this file.
00001 /************************************************
00002 
00003   digest.c -
00004 
00005   $Author: drbrain $
00006   created at: Fri May 25 08:57:27 JST 2001
00007 
00008   Copyright (C) 1995-2001 Yukihiro Matsumoto
00009   Copyright (C) 2001-2006 Akinori MUSHA
00010 
00011   $RoughId: digest.c,v 1.16 2001/07/13 15:38:27 knu Exp $
00012   $Id: digest.c 32951 2011-08-12 17:26:00Z drbrain $
00013 
00014 ************************************************/
00015 
00016 #include "digest.h"
00017 
00018 static VALUE rb_mDigest;
00019 static VALUE rb_mDigest_Instance;
00020 static VALUE rb_cDigest_Class;
00021 static VALUE rb_cDigest_Base;
00022 
00023 static ID id_reset, id_update, id_finish, id_digest, id_hexdigest, id_digest_length;
00024 static ID id_metadata;
00025 
00026 RUBY_EXTERN void Init_digest_base(void);
00027 
00028 /*
00029  * Document-module: Digest
00030  *
00031  * This module provides a framework for message digest libraries.
00032  *
00033  * You may want to look at OpenSSL::Digest as it supports support more
00034  * algorithms.
00035  *
00036  * A cryptographic hash function is a procedure that takes data and return a
00037  * fixed bit string : the hash value, also known as _digest_. Hash functions
00038  * are also called one-way functions, it is easy to compute a digest from
00039  * a message, but it is infeasible to generate a message from a digest.
00040  *
00041  * == Example
00042  *
00043  *   require 'digest'
00044  *
00045  *   # Compute a complete digest
00046  *   sha256 = Digest::SHA256.new
00047  *   digest = sha256.digest message
00048  *
00049  *   # Compute digest by chunks
00050  *   sha256 = Digest::SHA256.new
00051  *   sha256.update message1
00052  *   sha256 << message2 # << is an alias for update
00053  *
00054  *   digest = sha256.digest
00055  *
00056  * == Digest algorithms
00057  *
00058  * Different digest algorithms (or hash functions) are available :
00059  *
00060  * HMAC::
00061  *   See FIPS PUB 198 The Keyed-Hash Message Authentication Code (HMAC)
00062  * RIPEMD-160::
00063  *   (as Digest::RMD160) see
00064  *   http://homes.esat.kuleuven.be/~bosselae/ripemd160.html
00065  * SHA1::
00066  *   See FIPS 180 Secure Hash Standard
00067  * SHA2 family::
00068  *   See FIPS 180 Secure Hash Standard which defines the following algorithms:
00069  *   * SHA512
00070  *   * SHA384
00071  *   * SHA256
00072  *
00073  * The latest versions of the FIPS publications can be found here:
00074  * http://csrc.nist.gov/publications/PubsFIPS.html
00075  *
00076  * Additionally Digest::BubbleBabble encodes a digest as a sequence of
00077  * consonants and vowels which is more recognizable and comparable than a
00078  * hexadecimal digest.  See http://en.wikipedia.org/wiki/Bubblebabble
00079  */
00080 
00081 static VALUE
00082 hexencode_str_new(VALUE str_digest)
00083 {
00084     char *digest;
00085     size_t digest_len;
00086     size_t i;
00087     VALUE str;
00088     char *p;
00089     static const char hex[] = {
00090         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
00091         'a', 'b', 'c', 'd', 'e', 'f'
00092     };
00093 
00094     StringValue(str_digest);
00095     digest = RSTRING_PTR(str_digest);
00096     digest_len = RSTRING_LEN(str_digest);
00097 
00098     if (LONG_MAX / 2 < digest_len) {
00099         rb_raise(rb_eRuntimeError, "digest string too long");
00100     }
00101 
00102     str = rb_str_new(0, digest_len * 2);
00103 
00104     for (i = 0, p = RSTRING_PTR(str); i < digest_len; i++) {
00105         unsigned char byte = digest[i];
00106 
00107         p[i + i]     = hex[byte >> 4];
00108         p[i + i + 1] = hex[byte & 0x0f];
00109     }
00110 
00111     return str;
00112 }
00113 
00114 /*
00115  * call-seq:
00116  *     Digest.hexencode(string) -> hexencoded_string
00117  *
00118  * Generates a hex-encoded version of a given _string_.
00119  */
00120 static VALUE
00121 rb_digest_s_hexencode(VALUE klass, VALUE str)
00122 {
00123     return hexencode_str_new(str);
00124 }
00125 
00126 NORETURN(static void rb_digest_instance_method_unimpl(VALUE self, const char *method));
00127 
00128 /*
00129  * Document-module: Digest::Instance
00130  *
00131  * This module provides instance methods for a digest implementation
00132  * object to calculate message digest values.
00133  */
00134 
00135 static void
00136 rb_digest_instance_method_unimpl(VALUE self, const char *method)
00137 {
00138     rb_raise(rb_eRuntimeError, "%s does not implement %s()",
00139              rb_obj_classname(self), method);
00140 }
00141 
00142 /*
00143  * call-seq:
00144  *     digest_obj.update(string) -> digest_obj
00145  *     digest_obj << string -> digest_obj
00146  *
00147  * Updates the digest using a given _string_ and returns self.
00148  *
00149  * The update() method and the left-shift operator are overridden by
00150  * each implementation subclass. (One should be an alias for the
00151  * other)
00152  */
00153 static VALUE
00154 rb_digest_instance_update(VALUE self, VALUE str)
00155 {
00156     rb_digest_instance_method_unimpl(self, "update");
00157 }
00158 
00159 /*
00160  * call-seq:
00161  *     digest_obj.instance_eval { finish } -> digest_obj
00162  *
00163  * Finishes the digest and returns the resulting hash value.
00164  *
00165  * This method is overridden by each implementation subclass and often
00166  * made private, because some of those subclasses may leave internal
00167  * data uninitialized.  Do not call this method from outside.  Use
00168  * #digest!() instead, which ensures that internal data be reset for
00169  * security reasons.
00170  */
00171 static VALUE
00172 rb_digest_instance_finish(VALUE self)
00173 {
00174     rb_digest_instance_method_unimpl(self, "finish");
00175 }
00176 
00177 /*
00178  * call-seq:
00179  *     digest_obj.reset -> digest_obj
00180  *
00181  * Resets the digest to the initial state and returns self.
00182  *
00183  * This method is overridden by each implementation subclass.
00184  */
00185 static VALUE
00186 rb_digest_instance_reset(VALUE self)
00187 {
00188     rb_digest_instance_method_unimpl(self, "reset");
00189 }
00190 
00191 /*
00192  * call-seq:
00193  *     digest_obj.new -> another_digest_obj
00194  *
00195  * Returns a new, initialized copy of the digest object.  Equivalent
00196  * to digest_obj.clone().reset().
00197  */
00198 static VALUE
00199 rb_digest_instance_new(VALUE self)
00200 {
00201     VALUE clone = rb_obj_clone(self);
00202     rb_funcall(clone, id_reset, 0);
00203     return clone;
00204 }
00205 
00206 /*
00207  * call-seq:
00208  *     digest_obj.digest -> string
00209  *     digest_obj.digest(string) -> string
00210  *
00211  * If none is given, returns the resulting hash value of the digest,
00212  * keeping the digest's state.
00213  *
00214  * If a _string_ is given, returns the hash value for the given
00215  * _string_, resetting the digest to the initial state before and
00216  * after the process.
00217  */
00218 static VALUE
00219 rb_digest_instance_digest(int argc, VALUE *argv, VALUE self)
00220 {
00221     VALUE str, value;
00222 
00223     if (rb_scan_args(argc, argv, "01", &str) > 0) {
00224         rb_funcall(self, id_reset, 0);
00225         rb_funcall(self, id_update, 1, str);
00226         value = rb_funcall(self, id_finish, 0);
00227         rb_funcall(self, id_reset, 0);
00228     } else {
00229         value = rb_funcall(rb_obj_clone(self), id_finish, 0);
00230     }
00231 
00232     return value;
00233 }
00234 
00235 /*
00236  * call-seq:
00237  *     digest_obj.digest! -> string
00238  *
00239  * Returns the resulting hash value and resets the digest to the
00240  * initial state.
00241  */
00242 static VALUE
00243 rb_digest_instance_digest_bang(VALUE self)
00244 {
00245     VALUE value = rb_funcall(self, id_finish, 0);
00246     rb_funcall(self, id_reset, 0);
00247 
00248     return value;
00249 }
00250 
00251 /*
00252  * call-seq:
00253  *     digest_obj.hexdigest -> string
00254  *     digest_obj.hexdigest(string) -> string
00255  *
00256  * If none is given, returns the resulting hash value of the digest in
00257  * a hex-encoded form, keeping the digest's state.
00258  *
00259  * If a _string_ is given, returns the hash value for the given
00260  * _string_ in a hex-encoded form, resetting the digest to the initial
00261  * state before and after the process.
00262  */
00263 static VALUE
00264 rb_digest_instance_hexdigest(int argc, VALUE *argv, VALUE self)
00265 {
00266     VALUE str, value;
00267 
00268     if (rb_scan_args(argc, argv, "01", &str) > 0) {
00269         rb_funcall(self, id_reset, 0);
00270         rb_funcall(self, id_update, 1, str);
00271         value = rb_funcall(self, id_finish, 0);
00272         rb_funcall(self, id_reset, 0);
00273     } else {
00274         value = rb_funcall(rb_obj_clone(self), id_finish, 0);
00275     }
00276 
00277     return hexencode_str_new(value);
00278 }
00279 
00280 /*
00281  * call-seq:
00282  *     digest_obj.hexdigest! -> string
00283  *
00284  * Returns the resulting hash value in a hex-encoded form and resets
00285  * the digest to the initial state.
00286  */
00287 static VALUE
00288 rb_digest_instance_hexdigest_bang(VALUE self)
00289 {
00290     VALUE value = rb_funcall(self, id_finish, 0);
00291     rb_funcall(self, id_reset, 0);
00292 
00293     return hexencode_str_new(value);
00294 }
00295 
00296 /*
00297  * call-seq:
00298  *     digest_obj.to_s -> string
00299  *
00300  * Returns digest_obj.hexdigest().
00301  */
00302 static VALUE
00303 rb_digest_instance_to_s(VALUE self)
00304 {
00305     return rb_funcall(self, id_hexdigest, 0);
00306 }
00307 
00308 /*
00309  * call-seq:
00310  *     digest_obj.inspect -> string
00311  *
00312  * Creates a printable version of the digest object.
00313  */
00314 static VALUE
00315 rb_digest_instance_inspect(VALUE self)
00316 {
00317     VALUE str;
00318     size_t digest_len = 32;     /* about this size at least */
00319     const char *cname;
00320 
00321     cname = rb_obj_classname(self);
00322 
00323     /* #<Digest::ClassName: xxxxx...xxxx> */
00324     str = rb_str_buf_new(2 + strlen(cname) + 2 + digest_len * 2 + 1);
00325     rb_str_buf_cat2(str, "#<");
00326     rb_str_buf_cat2(str, cname);
00327     rb_str_buf_cat2(str, ": ");
00328     rb_str_buf_append(str, rb_digest_instance_hexdigest(0, 0, self));
00329     rb_str_buf_cat2(str, ">");
00330     return str;
00331 }
00332 
00333 /*
00334  * call-seq:
00335  *     digest_obj == another_digest_obj -> boolean
00336  *     digest_obj == string -> boolean
00337  *
00338  * If a string is given, checks whether it is equal to the hex-encoded
00339  * hash value of the digest object.  If another digest instance is
00340  * given, checks whether they have the same hash value.  Otherwise
00341  * returns false.
00342  */
00343 static VALUE
00344 rb_digest_instance_equal(VALUE self, VALUE other)
00345 {
00346     VALUE str1, str2;
00347 
00348     if (rb_obj_is_kind_of(other, rb_mDigest_Instance) == Qtrue) {
00349         str1 = rb_digest_instance_digest(0, 0, self);
00350         str2 = rb_digest_instance_digest(0, 0, other);
00351     } else {
00352         str1 = rb_digest_instance_to_s(self);
00353         str2 = other;
00354     }
00355 
00356     /* never blindly assume that subclass methods return strings */
00357     StringValue(str1);
00358     StringValue(str2);
00359 
00360     if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
00361         rb_str_cmp(str1, str2) == 0) {
00362         return Qtrue;
00363     }
00364     return Qfalse;
00365 }
00366 
00367 /*
00368  * call-seq:
00369  *     digest_obj.digest_length -> integer
00370  *
00371  * Returns the length of the hash value of the digest.
00372  *
00373  * This method should be overridden by each implementation subclass.
00374  * If not, digest_obj.digest().length() is returned.
00375  */
00376 static VALUE
00377 rb_digest_instance_digest_length(VALUE self)
00378 {
00379     /* subclasses really should redefine this method */
00380     VALUE digest = rb_digest_instance_digest(0, 0, self);
00381 
00382     /* never blindly assume that #digest() returns a string */
00383     StringValue(digest);
00384     return INT2NUM(RSTRING_LEN(digest));
00385 }
00386 
00387 /*
00388  * call-seq:
00389  *     digest_obj.length -> integer
00390  *     digest_obj.size -> integer
00391  *
00392  * Returns digest_obj.digest_length().
00393  */
00394 static VALUE
00395 rb_digest_instance_length(VALUE self)
00396 {
00397     return rb_funcall(self, id_digest_length, 0);
00398 }
00399 
00400 /*
00401  * call-seq:
00402  *     digest_obj.block_length -> integer
00403  *
00404  * Returns the block length of the digest.
00405  *
00406  * This method is overridden by each implementation subclass.
00407  */
00408 static VALUE
00409 rb_digest_instance_block_length(VALUE self)
00410 {
00411     rb_digest_instance_method_unimpl(self, "block_length");
00412 }
00413 
00414 /*
00415  * Document-class: Digest::Class
00416  *
00417  * This module stands as a base class for digest implementation
00418  * classes.
00419  */
00420 
00421 /*
00422  * call-seq:
00423  *     Digest::Class.digest(string, *parameters) -> hash_string
00424  *
00425  * Returns the hash value of a given _string_.  This is equivalent to
00426  * Digest::Class.new(*parameters).digest(string), where extra
00427  * _parameters_, if any, are passed through to the constructor and the
00428  * _string_ is passed to #digest().
00429  */
00430 static VALUE
00431 rb_digest_class_s_digest(int argc, VALUE *argv, VALUE klass)
00432 {
00433     VALUE str;
00434     volatile VALUE obj;
00435 
00436     if (argc < 1) {
00437         rb_raise(rb_eArgError, "no data given");
00438     }
00439 
00440     str = *argv++;
00441     argc--;
00442 
00443     StringValue(str);
00444 
00445     obj = rb_obj_alloc(klass);
00446     rb_obj_call_init(obj, argc, argv);
00447 
00448     return rb_funcall(obj, id_digest, 1, str);
00449 }
00450 
00451 /*
00452  * call-seq:
00453  *     Digest::Class.hexdigest(string[, ...]) -> hash_string
00454  *
00455  * Returns the hex-encoded hash value of a given _string_.  This is
00456  * almost equivalent to
00457  * Digest.hexencode(Digest::Class.new(*parameters).digest(string)).
00458  */
00459 static VALUE
00460 rb_digest_class_s_hexdigest(int argc, VALUE *argv, VALUE klass)
00461 {
00462     return hexencode_str_new(rb_funcall2(klass, id_digest, argc, argv));
00463 }
00464 
00465 /* :nodoc: */
00466 static VALUE
00467 rb_digest_class_init(VALUE self)
00468 {
00469     return self;
00470 }
00471 
00472 /*
00473  * Document-class: Digest::Base
00474  *
00475  * This abstract class provides a common interface to message digest
00476  * implementation classes written in C.
00477  */
00478 
00479 static rb_digest_metadata_t *
00480 get_digest_base_metadata(VALUE klass)
00481 {
00482     VALUE p;
00483     VALUE obj;
00484     rb_digest_metadata_t *algo;
00485 
00486     for (p = klass; !NIL_P(p); p = rb_class_superclass(p)) {
00487         if (rb_ivar_defined(p, id_metadata)) {
00488             obj = rb_ivar_get(p, id_metadata);
00489             break;
00490         }
00491     }
00492 
00493     if (NIL_P(p))
00494         rb_raise(rb_eRuntimeError, "Digest::Base cannot be directly inherited in Ruby");
00495 
00496     Data_Get_Struct(obj, rb_digest_metadata_t, algo);
00497 
00498     switch (algo->api_version) {
00499       case 2:
00500         break;
00501 
00502       /*
00503        * put conversion here if possible when API is updated
00504        */
00505 
00506       default:
00507         rb_raise(rb_eRuntimeError, "Incompatible digest API version");
00508     }
00509 
00510     return algo;
00511 }
00512 
00513 static VALUE
00514 rb_digest_base_alloc(VALUE klass)
00515 {
00516     rb_digest_metadata_t *algo;
00517     VALUE obj;
00518     void *pctx;
00519 
00520     if (klass == rb_cDigest_Base) {
00521         rb_raise(rb_eNotImpError, "Digest::Base is an abstract class");
00522     }
00523 
00524     algo = get_digest_base_metadata(klass);
00525 
00526     pctx = xmalloc(algo->ctx_size);
00527     algo->init_func(pctx);
00528 
00529     obj = Data_Wrap_Struct(klass, 0, xfree, pctx);
00530 
00531     return obj;
00532 }
00533 
00534 /* :nodoc: */
00535 static VALUE
00536 rb_digest_base_copy(VALUE copy, VALUE obj)
00537 {
00538     rb_digest_metadata_t *algo;
00539     void *pctx1, *pctx2;
00540 
00541     if (copy == obj) return copy;
00542 
00543     rb_check_frozen(copy);
00544 
00545     algo = get_digest_base_metadata(rb_obj_class(copy));
00546 
00547     Data_Get_Struct(obj, void, pctx1);
00548     Data_Get_Struct(copy, void, pctx2);
00549     memcpy(pctx2, pctx1, algo->ctx_size);
00550 
00551     return copy;
00552 }
00553 
00554 /* :nodoc: */
00555 static VALUE
00556 rb_digest_base_reset(VALUE self)
00557 {
00558     rb_digest_metadata_t *algo;
00559     void *pctx;
00560 
00561     algo = get_digest_base_metadata(rb_obj_class(self));
00562 
00563     Data_Get_Struct(self, void, pctx);
00564 
00565     algo->init_func(pctx);
00566 
00567     return self;
00568 }
00569 
00570 /* :nodoc: */
00571 static VALUE
00572 rb_digest_base_update(VALUE self, VALUE str)
00573 {
00574     rb_digest_metadata_t *algo;
00575     void *pctx;
00576 
00577     algo = get_digest_base_metadata(rb_obj_class(self));
00578 
00579     Data_Get_Struct(self, void, pctx);
00580 
00581     StringValue(str);
00582     algo->update_func(pctx, (unsigned char *)RSTRING_PTR(str), RSTRING_LEN(str));
00583 
00584     return self;
00585 }
00586 
00587 /* :nodoc: */
00588 static VALUE
00589 rb_digest_base_finish(VALUE self)
00590 {
00591     rb_digest_metadata_t *algo;
00592     void *pctx;
00593     VALUE str;
00594 
00595     algo = get_digest_base_metadata(rb_obj_class(self));
00596 
00597     Data_Get_Struct(self, void, pctx);
00598 
00599     str = rb_str_new(0, algo->digest_len);
00600     algo->finish_func(pctx, (unsigned char *)RSTRING_PTR(str));
00601 
00602     /* avoid potential coredump caused by use of a finished context */
00603     algo->init_func(pctx);
00604 
00605     return str;
00606 }
00607 
00608 /* :nodoc: */
00609 static VALUE
00610 rb_digest_base_digest_length(VALUE self)
00611 {
00612     rb_digest_metadata_t *algo;
00613 
00614     algo = get_digest_base_metadata(rb_obj_class(self));
00615 
00616     return INT2NUM(algo->digest_len);
00617 }
00618 
00619 /* :nodoc: */
00620 static VALUE
00621 rb_digest_base_block_length(VALUE self)
00622 {
00623     rb_digest_metadata_t *algo;
00624 
00625     algo = get_digest_base_metadata(rb_obj_class(self));
00626 
00627     return INT2NUM(algo->block_len);
00628 }
00629 
00630 void
00631 Init_digest(void)
00632 {
00633     id_reset           = rb_intern("reset");
00634     id_update          = rb_intern("update");
00635     id_finish          = rb_intern("finish");
00636     id_digest          = rb_intern("digest");
00637     id_hexdigest       = rb_intern("hexdigest");
00638     id_digest_length   = rb_intern("digest_length");
00639 
00640     /*
00641      * module Digest
00642      */
00643     rb_mDigest = rb_define_module("Digest");
00644 
00645     /* module functions */
00646     rb_define_module_function(rb_mDigest, "hexencode", rb_digest_s_hexencode, 1);
00647 
00648     /*
00649      * module Digest::Instance
00650      */
00651     rb_mDigest_Instance = rb_define_module_under(rb_mDigest, "Instance");
00652 
00653     /* instance methods that should be overridden */
00654     rb_define_method(rb_mDigest_Instance, "update", rb_digest_instance_update, 1);
00655     rb_define_method(rb_mDigest_Instance, "<<", rb_digest_instance_update, 1);
00656     rb_define_private_method(rb_mDigest_Instance, "finish", rb_digest_instance_finish, 0);
00657     rb_define_method(rb_mDigest_Instance, "reset", rb_digest_instance_reset, 0);
00658     rb_define_method(rb_mDigest_Instance, "digest_length", rb_digest_instance_digest_length, 0);
00659     rb_define_method(rb_mDigest_Instance, "block_length", rb_digest_instance_block_length, 0);
00660 
00661     /* instance methods that may be overridden */
00662     rb_define_method(rb_mDigest_Instance, "==", rb_digest_instance_equal, 1);
00663     rb_define_method(rb_mDigest_Instance, "inspect", rb_digest_instance_inspect, 0);
00664 
00665     /* instance methods that need not usually be overridden */
00666     rb_define_method(rb_mDigest_Instance, "new", rb_digest_instance_new, 0);
00667     rb_define_method(rb_mDigest_Instance, "digest", rb_digest_instance_digest, -1);
00668     rb_define_method(rb_mDigest_Instance, "digest!", rb_digest_instance_digest_bang, 0);
00669     rb_define_method(rb_mDigest_Instance, "hexdigest", rb_digest_instance_hexdigest, -1);
00670     rb_define_method(rb_mDigest_Instance, "hexdigest!", rb_digest_instance_hexdigest_bang, 0);
00671     rb_define_method(rb_mDigest_Instance, "to_s", rb_digest_instance_to_s, 0);
00672     rb_define_method(rb_mDigest_Instance, "length", rb_digest_instance_length, 0);
00673     rb_define_method(rb_mDigest_Instance, "size", rb_digest_instance_length, 0);
00674 
00675     /*
00676      * class Digest::Class
00677      */
00678     rb_cDigest_Class = rb_define_class_under(rb_mDigest, "Class", rb_cObject);
00679     rb_define_method(rb_cDigest_Class, "initialize",  rb_digest_class_init, 0);
00680     rb_include_module(rb_cDigest_Class, rb_mDigest_Instance);
00681 
00682     /* class methods */
00683     rb_define_singleton_method(rb_cDigest_Class, "digest", rb_digest_class_s_digest, -1);
00684     rb_define_singleton_method(rb_cDigest_Class, "hexdigest", rb_digest_class_s_hexdigest, -1);
00685 
00686     id_metadata = rb_intern("metadata");
00687 
00688     /* class Digest::Base < Digest::Class */
00689     rb_cDigest_Base = rb_define_class_under(rb_mDigest, "Base", rb_cDigest_Class);
00690 
00691     rb_define_alloc_func(rb_cDigest_Base, rb_digest_base_alloc);
00692 
00693     rb_define_method(rb_cDigest_Base, "initialize_copy",  rb_digest_base_copy, 1);
00694     rb_define_method(rb_cDigest_Base, "reset", rb_digest_base_reset, 0);
00695     rb_define_method(rb_cDigest_Base, "update", rb_digest_base_update, 1);
00696     rb_define_method(rb_cDigest_Base, "<<", rb_digest_base_update, 1);
00697     rb_define_private_method(rb_cDigest_Base, "finish", rb_digest_base_finish, 0);
00698     rb_define_method(rb_cDigest_Base, "digest_length", rb_digest_base_digest_length, 0);
00699     rb_define_method(rb_cDigest_Base, "block_length", rb_digest_base_block_length, 0);
00700 }
00701