The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 2.01 on page 36
Write a program to determine the ranges of char
, short
, int
, and long
variables, both signed
and unsigned
, by printing appropriate values from standard headers and by direct computation. Harder if you compute them: determine the ranges of the various floating-point types.
Solution by Rick Dearman (Category 0)
Has corrections by Russ Bobbitt marked /* RB */, by Stefan Farfeleder marked /* SF */, and by Ioannis A. Vranos - among the changes, radical changes on printf() messages were made, because Keith Thompson (kst-u@mib.org) realised that the messages were not much accurate - marked /* IV */
#include <stdio.h> #include <limits.h> int main(void) { printf("\nBits of type char: %d\n\n", CHAR_BIT); /* IV */ printf("Maximum numeric value of type char: %d\n", CHAR_MAX); /* IV */ printf("Minimum numeric value of type char: %d\n\n", CHAR_MIN); /* IV */ printf("Maximum value of type signed char: %d\n", SCHAR_MAX); /* IV */ printf("Minimum value of type signed char: %d\n\n", SCHAR_MIN); /* IV */ printf("Maximum value of type unsigned char: %u\n\n", (unsigned) UCHAR_MAX); /* SF */ /* IV */ printf("Maximum value of type short: %d\n", SHRT_MAX); /* IV */ printf("Minimum value of type short: %d\n\n", SHRT_MIN); /* IV */ printf("Maximum value of type unsigned short: %u\n\n", (unsigned) USHRT_MAX); /* SF */ /* IV */ printf("Maximum value of type int: %d\n", INT_MAX); /* IV */ printf("Minimum value of type int: %d\n\n", INT_MIN); /* IV */ printf("Maximum value of type unsigned int: %u\n\n", UINT_MAX); /* RB */ /* IV */ printf("Maximum value of type long: %ld\n", LONG_MAX); /* RB */ /* IV */ printf("Minimum value of type long: %ld\n\n", LONG_MIN); /* RB */ /* IV */ printf("Maximum value of type unsigned long: %lu\n\n", ULONG_MAX); /* RB */ /* IV */ return 0; }
Solution by Xggggg
considering long long integers:
#include <stdio.h> float getFloat(char sign, unsigned char exp, unsigned mantissa); double getDouble(char sign, unsigned short exp, unsigned long long mantissa); int main(void) { printf("Signed char[%d to %d]\n", ~((unsigned char)~0 >> 1), (unsigned char)~0 >> 1); printf("Unsigned char[0 to %u]\n", (unsigned char)~0); printf("Signed short[%d to %d]\n", ~((unsigned short)~0 >> 1), (unsigned short)~0 >> 1); printf("Unsigned short[0 to %u]\n", (unsigned short)~0); printf("Signed int[%d to %d]\n", ~(~0U >> 1), ~0U >> 1); printf("Unsigned int[0 to %u]\n", ~0U); printf("Signed long[%ld to %ld]\n", ~(~0UL >> 1), ~0UL >> 1); printf("Unsigned long[0 to %lu]\n", ~0UL); printf("Signed long long[%lld to %lld]\n", ~(~0ULL >> 1), ~0ULL >> 1); printf("Unsigned long long[0 to %llu]\n", ~0ULL); printf("Float[%g to %g]\n", getFloat(1, 0, 1), getFloat(0, ~0-1, ~0)); printf("Double[%g to %g]\n", getDouble(1, 0, 1), getDouble(0, ~0-1, ~0)); return 0; } float getFloat(char sign, unsigned char exp, unsigned mantissa) { unsigned f = (unsigned)(sign != 0) << 31 | (unsigned)exp << 23 | mantissa & 0x7FFFFF; return *((float *)&f); } double getDouble(char sign, unsigned short exp, unsigned long long mantissa) { unsigned long long d = (unsigned long long)(sign != 0) << 63 | (unsigned long long)(exp & 0x7FF) << 52 | mantissa & 0xFFFFFFFFFFFFF; return *((double *)&d); }
Solution by Pilcrow
Integer solution. Floats to follow.
I know 'long long' is not in K&R. This is the 21st century.
Learned a bit doing this.
/* calculate the ranges of various types */ #include <stdio.h> #include <limits.h> int main(void) { /* integer types */ unsigned char c; unsigned short s; unsigned int i; unsigned long l; unsigned long long ll; putchar('\n'); printf("VALUE RANGES FOR INTEGER TYPES\nUPPERCASE comes from <limits.h>\n" "lowercase is calculated.\n\n"); /* char */ c = ~0; c >>= 1; printf("signed char: %4d, %4d\n",-c-1,c); printf("SCHAR_MIN: %4d, SCHAR_MAX %4d\n",SCHAR_MIN,SCHAR_MAX); printf("unsigned char: %4u, %4u\n",0,c*2+1); printf("UCHAR_MAX: %4u\n",UCHAR_MAX); putchar('\n'); /* short */ s = ~0; s >>= 1; printf("signed short: %6d, %6d\n",-s-1,s); printf("SHRT_MIN: %6d, SHRT_MAX: %6d\n",SHRT_MIN,SHRT_MAX); printf("unsigned short: %6u, %6u\n",0,s*2+1); printf("USHRT_MAX: %6u\n",USHRT_MAX); putchar('\n'); /* int */ i = ~0; i >>= 1; printf("signed int: %11d, %11d\n",-i-1,i); printf("INT_MIN: %11d, INT_MAX: %11d\n",INT_MIN, INT_MAX); printf("unsigned int: %11u, %11u\n",0,i*2+1); printf("UINT_MAX: %11u\n",UINT_MAX); putchar('\n'); /* long */ l = ~0; l >>= 1; printf("signed long: %11ld, %11ld\n",-l-1,l); printf("LONG_MIN: %11ld, LONG_MAX: %11ld\n",LONG_MIN, LONG_MAX); printf("unsigned long: %11lu, %11lu\n",0l,l*2+1); printf("ULONG_MAX: %11lu\n", ULONG_MAX); putchar('\n'); /* long long */ ll = ~0; ll >>= 1; printf("signed long long: %20lld, %20lld\n",-ll-1,ll); printf("LLONG_MIN: %20lld, LLONG_MAX: %20lld\n",LLONG_MIN, LLONG_MAX); printf("unsigned long long: %20llu, %20llu\n",0ll,ll*2+1); printf("ULLONG_MAX: %20llu\n", ULLONG_MAX); return 0; }
Solution by CakeOFTrust
This solution uses overflow.
#include <stdio.h> #include <limits.h> #include <float.h> unsigned long maxval(char c[]); signed long minval(char); long double pepsi(char); long double fmaxval(char); long double fminval(char); long double powe(long double, int); float lim_f; double lim_d; long double lim_l; int main(void) { extern float lim_f; extern double lim_d; extern long double lim_l; int i = 2, ex = 0; lim_f = pepsi('f'); lim_d = pepsi('d'); lim_l = pepsi('l'); printf("\n%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", maxval("uc") == UCHAR_MAX, maxval("sc") == SCHAR_MAX, maxval("us") == USHRT_MAX, maxval("ss") == SHRT_MAX, maxval("ui") == UINT_MAX, maxval("si") == INT_MAX, maxval("ul") == ULONG_MAX, maxval("sl") == LONG_MAX); printf("\n%d\t%d\t%d\t%d\n", minval('c') == SCHAR_MIN, minval('s') == SHRT_MIN, minval('i') == INT_MIN, minval('l') == LONG_MIN); printf("\n%d\t%d\t%d\n", fmaxval('f') == FLT_MAX, fmaxval('d') == DBL_MAX, fmaxval('l') == LDBL_MAX); printf("\n%d\t%d\t%d\n", fminval('f') == -FLT_MAX, fminval('d') == -DBL_MAX, fminval('l') == -LDBL_MAX); while (powe(i, ex) <= FLT_MAX) ++ex; printf("\n%d\t", powe(i, -ex + 2) == FLT_MIN); while (powe(i, ex) <= DBL_MAX) ++ex; printf("%d\t", powe(i, -ex + 2) == DBL_MIN); while (powe(i, ex) <= LDBL_MAX) ++ex; printf("%d\n", powe(i, -ex + 2) == LDBL_MIN); return 0; } unsigned long maxval(char c[]) { signed char sc = 1, sche; signed int si = 1, sihe; signed short ss = 1, sshe; signed long sl = 1, slhe; unsigned char uc = 1, uche; unsigned int ui = 1, uihe; unsigned short us = 1, ushe; unsigned long ul = 1, ulhe, helper = 1; if (c[1] == 'c') { if (c[0] != 'u') { while (sc < (sche = 2 * sc)) sc = 2 * sc; while ((sche = sc + (sc - helper)) < sc) ++helper; return sche; } else { while (uc < (uche = 2 * uc)) uc = 2 * uc; while ((uche = uc + (uc - helper)) < uc) ++helper; return uche; } } else if (c[1] == 'i') { if (c[0] != 'u') { while (si < 2 * si) si = 2 * si; while ((sihe = si + (si - helper)) < si) ++helper; return sihe; } else { while (ui < 2 * ui) ui = 2 * ui; while ((uihe = ui + (ui - helper)) < ui) ++helper; return uihe; } } else if (c[1] == 's') { if (c[0] != 'u') { while (ss < (sshe = 2 * ss)) ss = 2 * ss; while ((sshe = ss + (ss - helper)) < ss) ++helper; return sshe; } else { while (us < (ushe = 2 * us)) us = 2 * us; while ((ushe = us + (us - helper)) < us) ++helper; return ushe; } } else if (c[1] == 'l') { if (c[0] != 'u') { while (sl < (slhe = 2 * sl)) sl = 2 * sl; while ((slhe = sl + (sl - helper)) < sl) ++helper; return slhe; } else { while (ul < (ulhe = 2 * ul)) ul = 2 * ul; while ((ulhe = ul + (ul - helper)) < ul) ++helper; return ulhe; } } else { printf("\nWrong integer type. Try again.\n"); return 0; } } signed long minval(char c) { signed char ch = -1, che; signed int i = -1, ihe; signed short s = -1, she; signed long l = -1, lhe, helper = 1; if (c == 'c') { while ((che = 2 * ch) < ch) ch = 2 * ch; if ((che = ch - helper) < ch) while ((che = ch + (ch + helper)) > ch) ++helper; else che = ch; return che; } else if (c == 's') { while ((she = 2 * s) < s) s = 2 * s; if ((she = s - helper) < s) while ((she = s + (s + helper)) > s) ++helper; else she = s; return she; } if (c == 'i') { while (2 * i < i) i = 2 * i; if ((ihe = i - helper) < i) while ((ihe = i + (i + helper)) > i) ++helper; else ihe = i; return ihe; } else if (c == 'l') { while ((lhe = 2 * l) < l) l = 2 * l; if ((lhe = l - helper) < l) while ((lhe = l + (l + helper)) > l) ++helper; else lhe = l; return lhe; } else { printf("\nWrong integer type. Try again.\n"); return 0; } } long double pepsi(char c) { float feps = 1; double deps = 1; long double leps = 1, pr = 0, dr = 0; if (c == 'f') while ((feps = 1 + feps / 2) != dr) { pr = dr; dr = feps; } else if (c == 'd') while ((deps = 1 + deps / 2) != dr) { pr = dr; dr = deps; } else if (c == 'l') while ((leps = 1 + leps / 2) != dr) { pr = dr; dr = leps; } else printf("\nWrong floating-point type. Try again.\n"); return pr; } long double fmaxval(char c) { float f; double d; long double l, pr = 0; extern float lim_f; extern double lim_d; extern long double lim_l; if (c == 'f') { f = lim_f; while (f != 2 * f) { pr = f; f = f * 2; } } else if (c == 'd') { d = lim_d; while (d != 2.0 * d) { pr = d; d = d * 2.0; } } else if (c == 'l') { l = lim_l; while (l != 2 * l) { pr = l; l = l * 2; } } else printf("\nWrong floating-point type. Try again.\n"); return pr; } long double fminval(char c) { float f; double d; long double l, pr = 0; extern float lim_f; extern double lim_d; extern long double lim_l; if (c == 'f') { f = -lim_f; while (f * 2 != f) { pr = f; f = f * 2; } } else if (c == 'd') { d = -lim_d; while (d * 2 != d) { pr = d; d = d * 2; } } else if (c == 'l') { l = -lim_l; while (l * 2 != l) { pr = l; l = l * 2; } } else printf("\nWrong floating-point type. Try again.\n"); return pr; } long double powe(long double i, int p) { long double r; if (i != 0 && i != 1) { if (p > 0) for(r = i, i = 1; p > 0; --p) i = i * r; else for(r = i, i = 1; p < 0; ++p) i = i / r; } return i; }
Solution by i9383
if have suggests please tell me
#include <stdio.h> #include <limits.h> #include <float.h> int powerint(int i); /*type int calcuate limit of int*/ long powerlong(int i); /*type long calculate limit of long*/ float powerfloat(int i); /*type float calculate limit of float*/ double powerdouble(int i); /*type double calculate limit of double*/ main() { char char1, char0; /*char1 is max char0 is min*/ short short1, short0; /*short1 is max short0 is min*/ int int0, i; /*int0 represent max or min*/ double d; float f; char0 = 0; for (char1 = 0; char1 <= char0; --char1) { /*char type is 8 bits example can shirnks to 4 bits -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 when char1 equals -8 if subtracts 1 will equals 7 when char1 equals -8 char0 equals -8 when char1 equals 7 not less than or equal -8*/ char0 = char1; } printf("computation char max %d\n", char1); printf("computation char min %d\n\n", char0); short0 = 0; for (short1 = 0; short1 <= short0; --short1) short0 = short1; printf("computation short max %d\n", short1); printf("computation short min %d\n\n", short0); int0 = 0; for (i = 0; powerint(i) != 0;++i) /*if int range is -4 -3 -2 -1 0 1 2 3 when i == 2 and powerint(i) == 4 will be -4 cause 3 add 1 == -4 when int0 = -4 when i == 3 is 1 2 3 4 5 6 7 8 powerint(3) == 0*/ int0 = powerint(i); printf("computation int max %d\n", int0 - 1); int0 = 0; for (i = 0; int0 >= (0 - powerint(i)); ++i) /*if int range is -2 -1 0 1 when i = 1 and powerint(1) == 2 powerint(1) == -2 (0 - powerint(1)) == 2 (0 - powerint(1)) == -2 int0 = -2 when i = 2 and powerint(2) == 4 1 2 3 4 (0 - powerint(2)) == 0*/ int0 = powerint(i); printf("computation int min %d\n\n", int0); for (i = 0; powerlong(i) != 0; ++i) /*if long type range is -2 -1 0 1 when i == 1 and powerlong(1) == 2 powerlong(1) == -2 when powerlong(2) 1 2 3 4 powerlong(2) == 0*/ ; printf("computation long max %ld\n", powerlong(i-1) - 1); /*i == 2 powerlong(1) == -2 powerlong(1) - 1 == 1*/ printf("computation long min %ld\n\n", powerlong(i-1)); printf("char max %d\n", CHAR_MAX); printf("char min %d\n", CHAR_MIN); printf("unsigned char max %d\n\n", UCHAR_MAX); printf("short max %d\n", SHRT_MAX); printf("short min %d\n", SHRT_MIN); printf("unsigned short max %d\n\n", USHRT_MAX); printf("int max %d\n", INT_MAX); printf("int min %d\n", INT_MIN); printf("unsigned int max %ld\n\n", UINT_MAX); printf("long max %ld\n", LONG_MAX); printf("long min %ld\n", LONG_MIN); printf("unsigned long max %.0f\n\n", d = ULONG_MAX); printf("float max %f\n", f = FLT_MAX); printf("float min %.126f\n", f = FLT_MIN); printf("float max exponent %d\n", FLT_MAX_EXP); printf("float min exponent %d\n", FLT_MIN_EXP); printf("float mantissa digit exponent %d\n", FLT_MANT_DIG); printf("float epsilon %.23f\n", FLT_EPSILON); printf("double max %f\n", DBL_MAX); printf("double min %.1022f\n", DBL_MIN); printf("double max exponent %d\n", DBL_MAX_EXP); printf("double min exponent %d\n", DBL_MIN_EXP); printf("double epsilon %.52f\n", DBL_EPSILON); printf("double mantissa digit exponent %.333f\n\n", DBL_MANT_DIG); printf("computation max float %f\n", f = powerfloat(127) * (2 - powerfloat(-23))); /*not very understand about this*/ printf("computation min float %.126f\n", f = powerfloat(-126)); printf("computation double max %f\n", d = powerdouble(1023) * (2 - DBL_EPSILON)); printf("computation double min %.1022f\n", d = powerdouble(-1022)); printf("computation denormalized number min %.149f\n", f = powerfloat(-126) * powerfloat (-23)); printf("computation denormalized number max %.149f\n\n", f = powerfloat(-126) * (1 - powerfloat (-23))); return 0; } int powerint(int i) { int p; p = 1; while (i) { p = p * 2; --i; } return p; } long powerlong(int i) { long p ; p = 1; while (i) { p = p * 2; --i; } return p; } float powerfloat(int i) { float p; p = 1; if (i > 0) while (i) { p = p * 2; --i; } if (i < 0) while (i) { p = p / 2; ++i; } return p; } double powerdouble(int i) { double p; p = 1; if (i > 0) while (i) { p = p * 2; --i; } if (i < 0) while (i) { p = p / 2; ++i; } return p; }
Solution by anonymous
This uses underflow to calculate the max value of all unsigned integer types. It then uses this to calculate the max/min values of signed integer types
This also calculates floats, doubles, and long doubles based off of the IEEE 754 standard.
Since Windows isn't fully compatible with the C standard, some code was written to work with Windows and some was written for the standard
Comments are a bit overkill and variable names could be better...
#include <stdio.h> #include <math.h> // for pow and powl functions /* Exercise 2-1. Write a program to determine the ranges of char, short, int, and long variables, both signed and unsigned, by printing appropriate values from standard headers and by direct computation. Harder if you compute them: determine the ranges of the various floating-point types. */ /* printf specifier format flags used: %d = signed decimal integer %u = unsigned decimal integer %f = decimal floating point printf length format flags used: %h = short integer type %l = long integer type %ll = long long integer type (standard C, but not supported in Windows) %I64 = long long integer type (Windows only method) %L = long floating type (standard C, but not supported in Windows) Windows does not support long doubles types and stores them as doubles in memory. Source: https://docs.microsoft.com/en-us/cpp/c-language/type-long-double?view=msvc-160 */ int main() { unsigned char min3, max3; min3 = max3 = 0; --max3; // cause an underflow from min to get max printf("unsigned char range:\t\t%hu - %hu\n", min3, max3); char min1, max1; min1 = max1 = max3 / 2; // integer division gets max min1 *= -1; // if you make max negative and subtract 1 you get min --min1; printf("char range:\t\t\t%hd - %hd\n", min1, max1); signed char min2, max2; min2 = max2 = max3 / 2; // integer division gets max min2 *= -1; // if you make max negative and subtract 1 you get min --min2; printf("signed char range:\t\t%hd - %hd\n", min2, max2); unsigned short min8, max8; min8 = max8 = 0; --max8; // cause an underflow from min to get max printf("unsigned short range:\t\t%hu - %hu\n", min8, max8); unsigned short int min9, max9; min9 = max9 = 0; --max9; // cause an underflow from min to get max printf("unsigned short int range:\t%hu - %hu\n", min9, max9); short min4, max4; min4 = max4 = max8 / 2; // integer division gets max min4 *= -1; // if you make max negative and subtract 1 you get min --min4; printf("short range:\t\t\t%hd - %hd\n", min4, max4); short int min5, max5; min5 = max5 = max9 / 2; // integer division gets max min5 *= -1; // if you make max negative and subtract 1 you get min --min5; printf("short int range:\t\t%hd - %hd\n", min5, max5); signed short min6, max6; min6 = max6 = max8 / 2; // integer division gets max min6 *= -1; // if you make max negative and subtract 1 you get min --min6; printf("signed short range:\t\t%hd - %hd\n", min6, max6); signed short int min7, max7; min7 = max7 = max9 / 2; // integer division gets max min7 *= -1; // if you make max negative and subtract 1 you get min --min7; printf("signed short int range:\t\t%hd - %hd\n", min7, max7); unsigned min13, max13; min13 = max13 = 0; --max13; // cause an underflow from min to get max printf("unsigned range:\t\t\t%u - %u\n", min13, max13); unsigned int min14, max14; min14 = max14 = 0; --max14; // cause an underflow from min to get max printf("unsigned int range:\t\t%u - %u\n", min14, max14); signed min11, max11; min11 = max11 = max13 / 2; // integer division gets max min11 *= -1; // if you make max negative and subtract 1 you get min --min11; printf("signed range:\t\t\t%d - %d\n", min11, max11); int min10, max10; min10 = max10 = max14 / 2; // integer division gets max min10 *= -1; // if you make max negative and subtract 1 you get min --min10; printf("int range:\t\t\t%d - %d\n", min10, max10); signed int min12, max12; min12 = max12 = max14 / 2; // integer division gets max min12 *= -1; // if you make max negative and subtract 1 you get min --min12; printf("signed int range:\t\t%d - %d\n", min12, max12); unsigned long min19, max19; min19 = max19 = 0; --max19; // cause an underflow from min to get max printf("unsigned long range:\t\t%lu - %lu\n", min19, max19); unsigned long int min20, max20; min20 = max20 = 0; --max20; // cause an underflow from min to get max printf("unsigned long int range:\t%lu - %lu\n", min20, max20); long min15, max15; min15 = max15 = max19 / 2; // integer division gets max min15 *= -1; // if you make max negative and subtract 1 you get min --min15; printf("long range:\t\t\t%ld - %ld\n", min15, max15); long int min16, max16; min16 = max16 = max20 / 2; // integer division gets max min16 *= -1; // if you make max negative and subtract 1 you get min --min16; printf("long int range:\t\t\t%ld - %ld\n", min16, max16); signed long min17, max17; min17 = max17 = max19 / 2; // integer division gets max min17 *= -1; // if you make max negative and subtract 1 you get min --min17; printf("signed long range:\t\t%ld - %ld\n", min17, max17); signed long int min18, max18; min18 = max18 = max20 / 2; // integer division gets max min18 *= -1; // if you make max negative and subtract 1 you get min --min18; printf("signed long int range:\t\t%ld - %ld\n", min18, max18); unsigned long long min25, max25; min25 = max25 = 0; --max25; // cause an underflow from min to get max printf("unsigned long long range:\t%I64u - %I64u\n", min25, max25); // Windows printf("unsigned long long range:\t%llu - %llu\n", min25, max25); // C standard unsigned long long int min26, max26; min26 = max26 = 0; --max26; // cause an underflow from min to get max printf("unsigned long long int range:\t%I64u - %I64u\n", min26, max26); // Windows printf("unsigned long long int range:\t%llu - %llu\n", min26, max26); // C standard long long min21, max21; min21 = max21 = max25 / 2; // integer division gets max min21 *= -1; // if you make max negative and subtract 1 you get min --min21; printf("long long range:\t\t%I64d - %I64d\n", min21, max21); // Windows printf("long long range:\t\t%lld - %lld\n", min21, max21); // C standard long long int min22, max22; min22 = max22 = max26 / 2; // integer division gets max min22 *= -1; // if you make max negative and subtract 1 you get min --min21; printf("long long int range:\t\t%I64d - %I64d\n", min22, max22); // Windows printf("long long int range:\t\t%lld - %lld\n", min22, max22); // C standard signed long long min23, max23; min23 = max23 = max25 / 2; // integer division gets max min23 *= -1; // if you make max negative and subtract 1 you get min --min23; printf("signed long long range:\t\t%I64d - %I64d\n", min23, max23); // Windows printf("signed long long range:\t\t%lld - %lld\n", min23, max23); // C standard signed long long int min24, max24; min24 = max24 = max26 / 2; // integer division gets max min24 *= -1; // if you make max negative and subtract 1 you get min --min24; printf("signed long long int range:\t%I64d - %I64d\n", min24, max24); // Windows printf("signed long long int range:\t%lld - %lld\n", min24, max24); // C standard /* The Institute of Electrical and Electronics Engineers Standard for Floating-Point Arithmetic (IEEE 754) governs floating-point arithmetic. The standard specifies floats (binary32) and doubles (binary64). The data structure for these types consists of the sign, the exponent, and the significand (mantissa). The formula for calculating the floating point value using base 2 is as simple as: (-1)^sign * mantissa * 2^exponent The float has 32 bits of memory. This can represented as: 11111111111111111111111111111111 Broken into its components we have the following: [1] [11111111] [11111111111111111111111] ^ ^ ^ | | | Sign Exponent Mantissa Sign = 1 bit Exponent = 8 bits Mantissa = 23 bits Sign formula = Convert binary to decimal Exponent formula = Convert binary to decimal minus exponent bias Exponent bias = 2 ^ (exponent bits - 1) - 1 = 2 ^ 7 - 1 = 127 Exponent formula = Convert binary to decimal - 127 Mantissa formula = 1 + mantissa value. Mantissa value = sum 2^(negative power) starting at 2^-1, followed by 2^-2, and then 2^-3, etcetera, since the mantissa is after the decimal point. Only sum values when bit is set to 1. For example, the mantissa value of 11010000000000000000000 = 2^-1 + 2^-2 + 2^4 = 0.5 + 0.25 + 0.0625 = 0.8125. So the mantissa value with the leading 1 is 1.8125 Knowing this, you can calculate the smallest and biggest positive float. To start, the smallest positive float in binary is (exponent 00000000 is reserved in IEEE 754) 0 00000001 00000000000000000000000 Sign = 0 Exponent = 1 - 127 = -126 Mantissa = 1 + 0 = 1 Plug these values into the formula to get the smallest positive float: (-1)^0 * 1 * 2^-126 = 1 * 1 * 2^-126 = 2^-126 = 1.1754943508222875079687365372222e-38 = 0.000000000000000000000000000000000000011754943508222875079687365372222456778186655567720875215087517062784172594547271728515625 */ float smallest, largest; smallest = pow(2, -126); /* The largest positive float is (exponent 11111111 is reserved in IEEE 754) 0 11111110 11111111111111111111111 Sign = 0 Exponent = 254 - 127 = 127 Mantissa = 1 + (2^-1 + 2^-2 + 2^-3 + 2^-4 + 2^-5 + 2^-6 + 2^-7 + 2^-8 + 2^-9 + 2^-10 + 2^-11 + 2^-12 + 2^-13 + 2^-14 + 2^-15 + 2^-16 + 2^-17 + 2^-18 + 2^-19 + 2^-20 + 2^-21 + 2^-22 + 2^-23) = 1 + (0.5 + 0.25 + 0.125 + 0.0625 + 0.03125 + 0.015625, ...) = 1 + 0.99999988079071044921875 = 1.99999988079071044921875 Plugging these values in, the largest positive float is: (-1)^0 * 1.99999988079071044921875 * 2^127 = 1 * 1.99999988079071044921875 * 2^127 = 1.99999988079071044921875 * 2^127 = 3.4028234663852885981170418348452e+38 = 340282346638528859811704183484516925440 */ float mantissa = 1.0; for (int i = 1; i <= 23; ++i) mantissa += pow(2, -i); largest = mantissa * pow(2, 127); printf("float range:\t\t\t%e - %e\n", smallest, largest); /* Doubles are similar to floats in the standard with the key difference being 11 bits for exponents instead of 8 and 52 bits for the mantissa instead of 23 With the exponent having 11 bits, the bias changes as well. Exponent bias = 2 ^ (exponent bits - 1) - 1 = 2 ^ 10 - 1 = 1023 The smallest positive double in binary is 0 00000000001 0000000000000000000000000000000000000000000000000000 Sign = 0 Exponent = 1 - 1023 = -1022 Mantissa = 1 + 0 = 1 Plug these values into the formula to get the smallest positive double: (-1)^0 * 1 * 2^-1022 = 1 * 1 * 2^-1022 = 2^-1022 = 2.2250738585072013830902327173324e-308 = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002225073858507201383090232717332404064219215980462331830553327416887204434813918195854283159012511020564067339731035811005152434161553460108856012385377718821130777993532002330479610147442583636071921565046942503734208375250806650616658158948720491179968591639648500635908770118304874799780887753749949451580451605050915399856582470818645113537935804992115981085766051992433352114352390148795699609591288891602992641511063466313393663477586513029371762047325631781485664350872122828637642044846811407613911477062801689853244110024161447421618567166150540154285084716752901903161322778896729707373123334086988983175067838846926092773977972858659654941091369095406136467568702398678315290680984617210924625396728515625 */ double smallest2, largest2; smallest2 = pow(2, -1022); /* The largest positive double is 0 11111111110 1111111111111111111111111111111111111111111111111111 Sign = 0 Exponent = 2046 - 1023 = 1023 Mantissa = 1 + (2^-1 + 2^-2 + 2^-3 + 2^-4 + 2^-5 + 2^-6 + 2^-7 + 2^-8 + 2^-9 + 2^-10 + 2^-11 + 2^-12 + 2^-13 + 2^-14 + 2^-15 + ... 2^-50 + 2^-51 + 2^-52) = 1 + (0.5 + 0.25 + 0.125 + 0.0625 + 0.03125 + 0.015625, ...) = 1 + 0.9999999999999997779553950749686919152736663818359375 = 1.9999999999999997779553950749686919152736663818359375 Plugging these values in, the largest positive double is: (-1)^0 * 1.9999999999999997779553950749686919152736663818359375 * 2^1023 = 1 * 1.9999999999999997779553950749686919152736663818359375 * 2^1023 = 1.9999999999999997779553950749686919152736663818359375 * 2^1023 = 1.797693134862315708145274237317e+308 = 89884646028029723523792050055200746080880743333067211581378966141362454199239970316993013729219979853601475663988967503297926632535755454323283279335159487490309142777864967813920124379548441997355399598088320731372600357589290173338188996104869672504612340771096671433431455308810026499793291327606443999232 */ double mantissa2 = 1.0; for (int i = 1; i <= 52; ++i) mantissa2 += pow(2, -i); largest2 = mantissa2 * pow(2, 1023); printf("double range:\t\t\t%e - %e\n", smallest2, largest2); /* IEEE 754 allows for the precision formats to be extended. Consequently, the x86 extended precision format was created which is commonly used to store the long double data type (but not always!). This format uses the same formulas mentioned above, but the number of bits used and how they are used are different. Sign = 1 bit Exponent = 15 bits Mantissa = 64 bits Exponent bias = 2 ^ (exponent bits - 1) - 1 = 2 ^ 14 - 1 = 16383 The smallest positive long double in binary is 0 000000000000001 0000000000000000000000000000000000000000000000000000000000000000 Sign = 0 Exponent = 1 - 16383 = -16382 Mantissa = 1 + 0 = 1 Plug these values into the formula to get the smallest positive long double: (-1)^0 * 1 * 2^-16382 = 1 * 1 * 2^-16382 = 2^-16382 = 3.3621031431120935062626778173218e-4932 = (too big to display) */ long double smallest3, largest3; smallest3 = powl(2, -16382); /* The largest positive long double is 0 111111111111110 1111111111111111111111111111111111111111111111111111111111111111 Sign = 0 Exponent = 32766 - 16383 = 16383 Mantissa = 1 + (2^-1 + 2^-2 + 2^-3 + 2^-4 + 2^-5 + 2^-6 + 2^-7 + 2^-8 + 2^-9 + 2^-10 + 2^-11 + 2^-12 + 2^-13 + 2^-14 + 2^-15 + ... 2^-62 + 2^-63 + 2^-64) = 1 + (0.5 + 0.25 + 0.125 + 0.0625 + 0.03125 + 0.015625, ...) = 1 + 0.9999999999999999999457898913757247782996273599565029144287109375 = 1.9999999999999999999457898913757247782996273599565029144287109375 Plugging these values in, the largest positive long double is: (-1)^0 * 1.9999999999999999999457898913757247782996273599565029144287109375 * 2^16383 = 1 * 1.9999999999999999999457898913757247782996273599565029144287109375 * 2^16383 = 1.9999999999999999999457898913757247782996273599565029144287109375 * 2^16383 = 1.1897314953572317650535115898294e+4932 = (too big to display) Note: On Windows, long doubles are treated like regular doubles so the results will be wrong */ // doing the correct calculations will overflow largest3 to inf due to rounding errors. // Skipping the 64th bit in the mantissa3 calculation by not adding 2^-64 before multiplying it by 2^16383 prevents the overflow and still equals the same value in float.h. // This is because the long double can't store that kind of precision anyway. long double mantissa3 = 1.0; for (int i = 1; i <= 63; ++i) mantissa3 += powl(2.0, -i); largest3 = mantissa3 * powl(2.0, 16383.0); printf("long double range:\t\t%Le - %Le\n", smallest3, largest3); return 0; }