Friday 2 May 2014

Machine language

Problem 1. Hand-compile the following C fragments into Beta assembly language. You can assume that the necessary storage allocation for each variable or array has been done and that a UASM label has been defined that indicates the first storage location for that variable or array. All of the variables are stored in main memory (in the first 32k bytes of main memory so that they can be addressed by a 16-bit literal). You can also assume that all variables and arrays are C integers, i.e., 32-bit values.
  1. Explain what Beta assembly language instruction(s) are needed to load the value of a variable that has been allocated in the first 32k bytes of main memory (i.e., at an address less than 0x8000). How would your answer change if the variable was located at address outside this range (e.g., at address 0x12468).
    If the storage for the variable is located at an address less than 0x8000, the 16-bit constant field of the LD instruction can hold the complete address. Note that the 16-bit constant is sign-extended, so our address has to fit in 15 bits. So LD(R31,addr,R0) would load the contents Mem[addr] into R0 assuming addr < 0x8000. For addresses >= 0x8000 the 16-bit constant field isn't large enough to hold the address. In these cases one could use the LDR instruction to load a 32-bit address into a register and the use LD to fetch the data:
    vaddr:
       LONG(0x12468)
    ...
       LDR(vaddr,R0)    ; load address of variable into R0
       LD(R0,0,R0)      ; load Mem[address] into R0
    
  1. a = b + 3*c;

       LD(c,R1)
       SHLC(R1,1,R0)    ; 2*c
       ADD(R0,R1,R0)    ; + c
       LD(b,R1)
       ADD(R1,R0,R0)   
       ST(R0,a)
    
  1. if (a > b) c = 17;
       LD(a,R0)
       LD(b,R1)
       CMPLE(R0,R1,R0)
       BT(R0,_L2)
       CMOVE(17,R0)
       ST(R0,c)
    _L2:
    
  1. if (sxt_short) { b = (b << 16) >> 16; }
       LD(sxt_short,R0)
       BEQ(R0,_L3)
       LD(b,R1)
       SHLC(R1,16,R0)    ; shift so that bit 15 is now bit 31
       SRAC(R0,16,R0)    ; shift back, replicating sign bit
       ST(R0,b)
    
  1. cjt->salary += 3752;
    Assume that the salary component of the structure pointed to by cjt has a byte offset of 8 from the beginning of the structure.
       LD(cjt,R1)
       LD(R1,8,R0)
       ADDC(R0,3752,R0)
       ST(R0,8,R1)
    
  1. a[i] = a[i-1];
       LD(i,R0)
       SHLC(R0,2,R0)
       LD(R0,a-4,R1)
       ST(R1,a,R0)
    
  1. sum = 0;
    for (i = 0; i < 10; i = i+1) sum += i;
       ST(R31,sum)
       ST(R31,i)
    _L7:
       LD(sum,R0)
       LD(i,R1)
       ADD(R0,R1,R0)
       ST(R0,sum)
       ADDC(R1,1,R1)
       ST(R1,i)
       CMPLTC(R1,10,R0)
       BT(R0,_L7)
    


Problem 2. In block structured languages such as C or Java, the scope of a variable declared locally within a block extends only over that block, i.e., the value of the local variable cannot be accessed outside the block. Conceptually, storage is allocated for the variable when the block is entered and deallocated when the block is exited. In many cases, this means the compiler if free to use a register to hold the value of the local variable instead of a memory location.
Consider the following C fragment:
int sum = 0;
{ int i;
  for (i = 0; i < 10; i = i+1) sum += i;
}
  1. Hand-compile this loop into assembly language, using registers to hold the values of the local variables "i" and "sum".
       MOVE(R31,R2)      ; R2 holds sum
       ST(R2,sum)
       MOVE(R31,R1)      ; R1 holds i
    _L5:
       ADD(R2,R1,R2)
       ADDC(R1,1,R1)
       CMPLTC(R1,10,R0)
       BT(R0,_L5)
       ST(R2,sum)
    
  1. Define a memory access as any access to memory, i.e., instruction fetch, data read (LD), or data write (ST). Compare the number of total number of memory accesses generated by executing the optimized loop with the total number of memory access for the unoptimized loop (part G of the preceding problem).
    The unoptimized code has an 8 instruction loop that makes 4 data accesses; 10 loop iterations => 120 memory accesses. There are 4 additional memory accesses to initialize sum and i. Total = 124.The optimized code has a 4 instruction loop that makes 0 data accesses; 10 loop iterations => 40 memory accesses. There are 6 additional memory accesses to initializes sum and i, and to store sum at the end of the loop. Total = 46.
  1. Some optimizing compilers "unroll" small loops to amortize the overhead of each loop iteration over more instructions in the body of the loop. For example, one unrolling of the loop above would be equivalent to rewriting the program as
    int sum = 0;
    { int i;
      for (i = 0; i < 10; i = i+2) { sum += i; sum += i+1; }
    }
    
    Hand-compile this loop into Beta assembly language and compare the total number of memory accesses generated when it executes to the total number of memory accesses from part (1).
       MOVE(R31,R2)      ; R2 holds sum
       ST(R2,sum)
       MOVE(R31,R1)      ; R1 holds i
    _L5:
       ADD(R2,R1,R2)
       ADDC(R1,1,R0)
       ADD(R2,R0,R2)
       ADDC(R1,2,R1)
       CMPLTC(R1,10,R0)
       BT(R0,_L5)
       ST(R2,sum)
    
    This code has a 6 instruction loop that makes 0 data accesses; 5 loop iterations => 30 memory accesses. There are 6 additional memory accesses to initializes sum and i, and to store sum at the end of the loop. Total = 36.


Problem 3.
  1. Hand-assemble the following Beta assembly language program:
            I = 0x5678
            B = 0x1234
    
            LD(I,R0)
            SHLC(R0,2,R0)
            LD(R0,B,R1)
            MULC(R1,17,R1)
            ST(R1,B,R0)
            I = 0x5678
            B = 0x1234
    
            LD(R31,I,R0)        011000 00000 11111 0101 0110 0111 1000 = 0x601F5678
            SHLC(R0,2,R0)       111100 00000 00000 0000 0000 0000 0010 = 0xF0000002
            LD(R0,B,R1)         011000 00001 00000 0001 0010 0011 0100 = 0x60201234
            MULC(R1,17,R1)      110010 00001 00001 0000 0000 0001 0001 = 0xA8210011
            ST(R1,B,R0)         011001 00001 00000 0001 0010 0011 0100 = 0x64201234
    
  1. What C statement might have been compiled into the code fragment above?
    B[I] = B[I] * 17;


Problem 4. Hand-assemble the following Beta branch instructions into their binary representation:
  1. foo: BR(foo) [recall that BR(label) = BEQ(R31,label,R31)]
        BEQ    R31   R31   offset = -1
        011101 11111 11111 1111 1111 1111 1111
    
  1. BR(bar)
    bar:
        BEQ    R31   R31   offset = 0
        011101 11111 11111 0000 0000 0000 0000
    
  1. foo = 0x100
    . = 0x1000
    BF(R17,foo,R31)
        BEQ    R31   R17   offset = (0x100 - 0x1004)/4 = 0xFC3F
        011101 11111 10001 1111 1100 0011 1111
    
  1. Explain why PC-relative branch addressing is a good choice for computers like the Beta that can encode only a "small" constant in each instruction.
    Branches are used to implement conditional and looping constructs (e.g., ifwhilefor). So most branch targets are just a few instructions away. With PC-relative addressing, we can reach targets 32768 instructions before or 32767 instructions after the branch, independent of the actual absolute address of the branch. So used as an offset, the 16-bit constant can accommodate most branch targets even for very large programs. Used as an absolute address, branch targets would be constrained to be in the first 32K of memory.
  1. Suppose a different computer could encode an arbitrary 32-bit constant in an instruction (using, e.g., a variable-length instruction encoding). Would PC-relative addressing still make sense? Why?
    Even if a complete absolute address could be encoded in an instruction, PC-relative address might be much more compact since most branch targets are nearby, i.e., we could get by with a 16-bit offset instead of a 32-bit absolute address.


Problem 5.
  1. True or false: The Beta SUBC opcode could be eliminated since every SUBC instruction can be replaced an equivalent ADDC instruction.
    False: SUBC(Rx,0x8000,Rx) subtracts -32768 from Rx. The ADDC equivalent would add 32768 to Rx, but we can't express that constant in the signed, 16-bit constant field provided in the Beta instruction format.
  1. What is the binary representation for the Beta instruction SUBC(R17,12,R22)?
     SUBC   R22   R17          12
    110001 10110 10001 0000 0000 0000 1100 = 0xC6D1000C
    
  1. A certain TA wants to know what would happen if the Beta as implemented in the lab executed 0xEDEDEDED as an instruction. What does happen?
    0xEDEDEDED = 111011 01111 01101 1110 1101 1110 1101
    The opcode field correspsonds to an illegal instruction opcode which causes the beta to take a trap (saving the PC+4 of the offending instruction in the XP register) and set the PC to ILLOP.
  1. Suppose that the Beta instruction BR(error) were assembled into memory location 0x87654. Assuming that the instruction works as intended (i.e., when executed, control is transferred to the first instruction in the error routine), which of the following is the best statement about the possible values for the symbol "error"?
    1. it depends on the first instruction in the error routine.
    2. it can have any 32-bit value
    3. it can have any 32-bit value that is a multiple of 4
    4. it is a multiple of 4 in the range 0x7F658 to 0x8F654 inclusive.
    5. it is a multiple of 4 in the range 0x67658 to 0xA7654 inclusive.
    6. none of the above
    (E): A branch instruction in which the branch is taken will multiply the sign-extended 16-bit literal field by 4 and add it to PC+4.
    if literal = 0x8000 then
      new PC = 0x87654 + 4 - (8000 * 4) = 0x67658
    if literal = 0x7FFF then
      new PC = 0x87654 + 4 + (7FFF * 4) = 0xA7654
    


Problem 6. The Meta is a processor similar to the Beta, except that the data paths have been modified to accommodate the addition of a new Subtract One and Branch instruction:
  Usage: SOB(Ra,label,Rc)
  Operation:
    literal = ((OFFSET(label) - OFFSET(current inst))/4) - 1
    PC = PC + 4
    EA = PC + 4*SEXT(literal)
    Reg[Rc] = Reg[Ra] - 1
    if (Reg[Ra]- 1) != 0 then PC = EA
As with branches in the Beta, the binary encoding of the SOB instruction places the low-order 16 bits of the "literal" value in the low-order 16 bits of the instruction. The designers of the Meta implementation have used the Meta's ALU to perform the subtraction.
  1. Suppose R1 contains the value 1. How will executing SOB(R1,label,R31) change register R1 and the PC?
    R1 is unchanged since the destination register (Rc) of the example SOB instruction is R31. Reg[R1]-1 = 0, so the branch is not taken and so the PC will point to the instruction following the SOB.
  1. Consider the following instruction sequence:
    loop: ADD(R1,R2,R3)
          SOB(R4,loop,R4)
    
    Assuming the ADD instruction is placed in location 0x108 of memory, what are the contents of the low-order 16 bits of the SOB instruction?
    Actually we don't need to know the address of the ADD instruction to answer the question since the SOB instruction (like all Beta branches) uses PC-relative addressing. Remembering that the branch offset is computed from the PC of the instruction following the SOB, the correct contents of the offset field is -2 = 0xFFFE.
  1. A schematic for the adder circuitry in the ALU of the Meta is shown below:
    What would be the correct values for OP[2:0] in order to perform a subtract (i.e., SUM = A - B)?
    SUM = A - B = A + (~B + 1). Setting OP2 = 1 and OP1 = 0 selects ~B as the XB input to the 32-bit add, setting OP0 = 1 asserts the carry in for the low-order bit of the 32-bit add and hence provides the required "+1". So OP[2:0] = 0b101.
  1. What would be the correct values for OP[2:0] in order to perform the decrement needed for the SOB instruction (i.e., SUM = A - 1)?
    If OP[2:0] = 0b110, the XB input is set to (B or ~B) = all ones, the two's complement representation for -1. Carry-in should be set to 0.
  1. Is it possible to use the logic above to do an increment (i.e., SUM = A+1)?
    Yes, OP[2:0] = 0b001, setting XB to 0 and the carry-in to 1.


Problem 7. A local junk yard offers older CPUs with non-Beta architectures that require several clocks to execute each instruction. Here are the specifications:
ModelClock RateAvg. clocks/Inst.
x40 Mhz2.0
y100 Mhz10.0
z60 Mhz3.0
You are going to choose the machine which will execute your benchmark program the fastest, so you compiled and ran the benchmark on the three machines and counted the total instructions executed:
    x: 3,600,000 instructions executed
    y: 1,900,000 instructions executed
    z: 4,200,000 instructions executed
  1. Based on the above data which machine would you choose?
    Total execution time:

    x: (3,600,000 insts)(2 clocks/inst)(25 ns/inst) = 0.18 seconds
    y: (1,900,000 insts)(10 clocks/inst)(10 ns/inst) = 0.19 seconds
    z: (4,200,000 insts)(3 clocks/inst)(16.67 ns/inst) = 0.21 seconds
    X ran the benchmark the fastest.


Problem 8. Kerry DeWay is proposing to add a "Load Constant" instruction LDC(const,Rx) to the Beta instruction set. LDC loads the 32-bit constant const in register Rx. She can't convince the hardware team to implement LDC directly and consequently plans to define it as a macro. She is considering the following alternative implementations:
[1] .macro LDC(const,Rx) {
       LD(.+8,Rx)
       BR(.+8)
       LONG(const)
    }

[2] .macro LDC(const,Rx) {
       PUSH(R17)
       BR(.+8,R17)
       LONG(const)
       LD(R17,0,Rx)
       POP(R17)
    }

[3] .macro LDC(const,Rx) {
       ADDC(R31,const >> 16,Rx)
       SHLC(Rx,16,Rx)
       ADDC(Rx,const & 0xFFFF,Rx)
    }
Kerry tries each definition on a few test cases and convinces herself each works fine. The Quality Assurance team isn't so sure and complains that Kerry's LDC implementations don't all work for every choice of register (Rx), every choice of constant (const), and every choice of code location.
  1. Evaluate each approach and decide whether it works under all circumstances or if it fails, indicate that it misbehaves for certain choices of Rx, const or code location.
    [1] fails if the code is located so that the LD instruction is at, e.g., address 0x7FFC since we can't represent .+8 = 0x8004 in the 16-bit literal field of the LD instruction.[2] fails for LDC(const,R17) since the POP(R17) at the end of the macro restores the old value of R17, wiping out the constant we just loaded.[3] fails for any const which has bit 15 set (e.g., 0x8000) since the final ADDC will sign-extended its literal field, adding 0xFFFF to the high half of Rx.


Problem 9. Which of the following Beta instruction sequences might have resulted from compiling the following C statement?
int x[20], y;
y = x[1] + 4;
  1. LD (R31, x + 1, R0)
    ADDC (R0, 4, R0)
    ST (R0, y, R31)
    Not this one. If x[0] is stored at location x, x[1] is stored at location x + 4 since x[] is an integer array and each integer takes one word (4 bytes).
  1. CMOVE (4, R0)
    ADDC (R0, x + 4, R0)
    ST (R0, y, R31)
    Not this one. The second instructions adds the address of x[1] to R0, not the contents of x[1].
  1. LD (R31, x + 4, R0)
    ST (R0, y + 4, R31)
    Not this one. This stores x[1] in the location following the one word of storage allocated for y.
  1. CMOVE (4, R0)
    LD (R0, x, R1)
    ST (R1, y, R0)
    Not this one. This implements y[1] = x[1].
  1. LD (R31, x + 4, R0)
    ADDC (R0, 4, R0)
    ST (R0, y, R31)
    Yes!
  1. ADDC (R31, x + 1, R0)
    ADDC (R0, 4, R0)
    ST (R0, y, R31)
    Not this one. The ADDC instruction loads the address of x plus 1 into R0.


Problem 10. An unnamed associate of yours has broken into the computer (a Beta of course!) that 6.004 uses for course administration. He has managed to grab the contents of the memory locations he believes holds the Beta code responsible for checking access passwords and would like you to help discover how the password code works. The memory contents are shown in the table below:
Address Contents (in hexadecimal)
0x100 0xC05F0008
0x104 0xC03F0000
0x108 0xE060000F
0x10C 0xF0210004
0x110 0xA4230800
0x114 0xF4000004
0x118 0xC4420001
0x11C 0x77E20002
0x120 0x77FFFFF9
0x124 0xA4230800
0x128 0x605F0124
0x12C 0x90211000
  1. Reconstruct the Beta assembly code that corresponds to the binary instruction encoding shown above. If the code sequence contains branches, be sure to indicate the destination of each branch.
    Address Contents        Opcode  Rc      Ra      Rb      Assembly
    0x100 0xC05F0008 110000 00010  11111   ADDC(R31, 0x8, R2)
    0x104 0xC03F0000 110000 00001  11111  ADDC(R31, 0x0, R1)
    0x108 0xE060000F 111000 00011  00000  ANDC(R0, 0xF, R3)
    0x10C 0xF0210004 111100 00001  00001  SHLC(R1, 0x4, R1)
    0x110 0xA4230800 101001 00001 00011 00001 OR(R3, R1, R1)
    0x114 0xF4000004 111101 00000  00000  SHRC(R0, 0x4, R0)
    0x118 0xC4420001 110001 00010  00010  SUBC(R2, 0x1, R2)
    0x11C 0x77E20002 011101 11111  00010  BEQ(R2,0x128) *
    0x120 0x77FFFFF9 011101 11111  11111  BEQ(R31,0x108) ** 
    0x124 0xA4230800 101001 00001  00011  not an opcode
    0x128 0x605F0124 011000 00010  11111  LD(0x0124,R2)
    0x12C 0x90211000 100100 00001  00001 00010 CMPEQ(R1,R2,R1)
    
    * The literal in instruction 0x11c is 0x2, so the corresponding label in
      Beta assembly is
     PC + 4 + 4*literal = 0x11c + 4 + 4*2 = 0x128
    ** In instruction 0x120, SEXT(literal) = -7, so the corresponding label
       in Beta assembly is
     PC + 4 + 4*literal = 0x120 + 4 + 4*(-7) = 0x124 - 0x01C = 0x108
    
  1. Further investigation reveals that the password is just a 32-bit integer which is in R0 when the code above is executed and that the system will grant access if R1 = 1 after the code has been executed. What "passnumber" will gain entry to the system?
    Let's analyze this assembly by translating it to pseudo-code:
     R2 = 8; /* R2 is used as a counter */
     R1 = 0;
    
    loop: R3 = R0 & 0xF; /* R3 stores the current low nibble of R0 */
     R1 = R1 << 4;
     R1 = R3 | R1;
     R0 = R0 >> 4;
     R2 = R2 - 1;
     if R2 == 0 goto done;
     goto loop;
    
    data: 0xA4230800
    
    done: LD(data,R2);
     if (R1 == R2)
      R1=1;
     else
      R1=0;
    
    We can see that the code shifts R1 left by a nibble (4 bits) and ors it with the low nibble (R3) of the user's entered password (R0). It then shifts the user's password right by a nibble and loops back to the beginning. It does this a total of 8 times. The net effect is to reverse the order of the nibbles in R0 and to store this into R1. The result is then compared to 0xA4230800. Therefore, in order for the entered password to be accepted, it must be the nibble-reversed version of 0xA4230800.
    Thus, the "passnumber" required to enter is 0x0080324A

Correctness of Algorithms

What does it mean to produce a correct solution to a problem? We can usually specify precisely what a correct solution would entail. For example, if your GPS produces a correct solution to finding the best route to travel, it might be the route, out of all possible routes from where you are to your desired destination, that will get you there soonest or perhaps the route that has the shortest possible distance or the route that will get you there soonest but also avoids tolls. Of course, the information that your GPS uses to determine a route might not match reality. Unless your GPS can access real-time traffic information, it might assume that the time to traverse a road equals the road's distance divided by the road's speed limit. If the road is congested, however, the GPS might give you bad advice if you're looking for the fastest route. We can still say that the routing algorithm that the GPS runs is correct, however, even if the input to the algorithm is not; for the input given to the routing algorithm, the algorithm produces the fastest route. Now, for some problems, it might be difficult or even impossible to say whether an algorithm produces a correct solution.
Sometimes, however, we can accept that a computer algorithm might produce an incorrect answer, as long as we can control how often it does so. Encryption provides a good example. The commonly used RSA cryptosystem relies on determining whether large numbers-really large, as in hundreds of digits long-are prime. If you have ever written a computer program, you could probably write one that determines whether a number n is prime. It would test all candidate divisors from 2 through n - 1, and if any of these candidates is indeed a divisor of n, then n is composite. If no number between 2 and n - 1 is a divisor of n, then n is prime. But if n is hundreds of digits long, that's a lot of candidate divisors, more than even a really fast computer could check in any reasonable amount of time. Of course, you could make some optimizations, such as eliminating all even candidates once you find that 2 is not a divisor, or stopping once you get to sqrt(n) (since if d is greater than sqrt(n) and d is a divisor of n, then n/ d is less than sqrt(n) and is also a divisor of n; therefore, if n has a divisor, you will find it by the time you get to sqrt(n)). If n is hundreds of digits long, then although sqrt(n) has only about half as many digits as n does, it's still a really large number. The good news is that we know of an algorithm that tests quickly whether a number is prime. The bad news is that it can make errors. In particular, if it declares that n is composite, then n is definitely composite, but if it declares that n is prime, then there's a chance that n is actually composite. But the bad news is not all that bad: we can control the error rate to be really low, such as one error in every 250 times. That's rare enough-one error in about every million billion times-for most of us to be comfortable with using this method to determine whether a number is prime for RSA.
Correctness is a tricky issue with another class of algorithms, called approximation algorithms. Approximation algorithms apply to optimization problems, in which we want to find the best solution according to some quantitative measure. Finding the fastest route, as a GPS does, is one example, where the quantitative measure is travel time. For some problems, we have no algorithm that finds an optimal solution in any reasonable amount of time, but we know of an approximation algorithm that, in a reasonable amount of time, can find a solution that is almost optimal. By "almost optimal;' we typically mean that the quantitative measure of the solution found by the approximation algorithm is within some known factor of the optimal solution's quantitative measure. As long as we specify what the desired factor is, we can say that a correct solution from an approximation algorithm is any solution that is within that factor of the optimal solution.

Thursday 1 May 2014

Some Programs to work with

COUNTING BITS IN AN UNSIGNED INTEGER
Computing the number of 1-bits in an unsigned integer:
/* Counting the Bit Set for an Unsigned Integer */
/*
(A) Loop through all the bits
    Computing time is proportional to the number of bits
(B) Computing time is proportional to the number of bits set
*/

#include <iostream>

using namespace std;


/* bit count for unsigned integer */
/* checking if the first bit == 1 while shifting right
   (25) 
   11001 -> 1100 -> 110 -> 11 -> 1
*/

int bit_countA(unsigned int n){
 int count = 0;
 while(n) {
  count += n & 0x1u;
  n >>= 1;  
 }
 return count;
}


/* sets the right most bit set 1 to 0 */
/* Computing time is proportional to the number of bits set */

int bit_countB(unsigned int n) {
 int count = 0;
 while(n) {
  count ++;
  n &= n - 1;  
 }
 return count;
}

int main()
{
 /* 25 (11001) */
 unsigned int num = 25;

 cout << num <<": # of bits set (A) is " << bit_countA(num) << endl;
 cout << num <<": # of bits set (B) is " << bit_countB(num) << endl;

 return 0;
}


INT STRLEN(CONST CHAR *)
strlen() counts just the visible characters and not the null character.
int strlen1(const char *s) {
    int n=0;
    while (*s++) n++;
    return(n);
}

int strlen2(const char *s) {
    int n=0;
    while (s[n]) n++;
    return(n);
}
CHAR* STRCPY(CHAR *, CONST CHAR*)
char* strcpy1(char s1[], const char s2[]) {
 for(int i=0; i <=strlen(s2); i++) {
  s1[i] = s2[i];
 }
 return s1;
}

char* strcpy2(char *s1, const char *s2) {
 char *p = s1;
 while(*s2!='\0') {
  *s1 = *s2;
  s1++;
  s2++;
 }
        *s1 = '\0';
 return p;
}

char* strcpy3(char *s1, const char *s2) {
 char *p = s1;
 while(*s2) *s1++ = *s2++;
        *s1 = '\0';
 return p;
}

char* strcpy4(char *s1, const char *s2) {
 char *p = s1;
 while(*s1++ = *s2++);
 return p;
}


char* strcpy5(char *s1, const char *s2) {
 int i = 0, j = 0;
 while(s1[i++] = s2[j++]);
 return s1;
}




CHAR * STRDUP ( CHAR *S)
strdup() does dynamic memory allocation for the character array including the end character '\0' and returns the address of the heap memory:
char *strdup (const char *s) {
    char *p = malloc (strlen (s) + 1);   // allocate memory
    if (p != NULL)
        strcpy (p,s);                    // copy string
    return p;                            // return the memory
}
So, what it does is giving us another string identical to the string given by its argument, without requiring us to allocate memory. But we still need to free it, later.
Though strdup() has been widely used, it's not a standard C/C++ library. So, there is an ambiguity regarding which memory allocation function was used and which deallocation function should be used to deallocate the memory: free() or delete()?
So, if the portability of our code is an issue, we should avoid using it.


VOID * MEMCPY ( VOID * DESTINATION, CONST VOID * SOURCE, SIZE_T SZ );
Copy block of memory
Copies the values of sz bytes from the location pointed by source directly to the memory block pointed by destination.
The underlying types of the objects pointed by both the source and destination pointers are irrelevant for this function;
The result is a binary (bit pattern) copy of the data.
The function does not check for any terminating null character in source - it always copies exactly sz bytes.
The behavior of memcpy() is undefined if source and destination overlap.
#include <iostream>
using namespace std;

void *memcpy( void *destination, const void *source, size_t sz ) {
  char *dest = (char *)destination;
  char const *src = (char *)source;

  while (sz--)
    *dest++ = *src++;
  return destination;
}

int main()
{
 char src1[] = "Ludwig Van Beethoven";
 char *src2 = "Franz Liszt ";
 char dest[40];

 cout << "src 1: " << src1 << endl;
 memcpy(dest,src1, strlen(src1)+1);
 cout << "dest: " << dest<< endl;

 cout << "src 2: " << src2 << endl;
 memcpy(dest,src2, strlen(src2)+1);
 cout << "dest: " << dest<< endl;

 return 0;
}
Output is:
src 1: Ludwig Van Beethoven
dest: Ludwig Van Beethoven
src 2: Franz Liszt
dest: Franz Liszt



VOID *MEMMOVE(VOID *DEST, CONST VOID *SRC, SIZE_T SZ)
Memmove() copies n bytes between two memory areas; unlike memcpy(), the areas may overlap. Actually, it is a variant ofmemcpy() that allows the source and destination block to overlap; otherwise it is equivalent (but slightly slower).
#include <iostream>
using namespace std;

void *memcpy(void *dest, const void *src, size_t sz)
{
 char *tmp = (char *)dest;
 char *s = (char *)src;

 while(sz--) {
  *tmp++ = *s++;
 }
 return dest;
}

void *memmove(void *dest, const void *src, size_t sz)
{
 char *tmp, *s;

 if (dest <= src) {
  tmp = (char *) dest;
  s = (char *) src;
  while (sz--)
   *tmp++ = *s++;
 }
 else {
  tmp = (char *) dest + sz;
  s = (char *) src + sz;
  while (sz--)
   *--tmp = *--s;
 }

 return dest;
}

int main()
{
 char src[50] = "Einstein was a very nice person!";

 cout << src << endl;
 memmove(src + 20, src + 15, 17);
 cout << src << endl;

 return 0;
}
Output is:
Einstein was a very nice person!
Einstein was a very very nice person!



VOID * MEMSET ( VOID * DESTINATION, INT SOURCE, SIZE_T SZ );
Sets the first sz bytes of the block of memory pointed by destination to the specified source (interpreted as an unsigned char).
#include <stdio.h>
#include <string.h>

void *MyMemset(void* destination, int source, size_t sz)
{
 char *pt = (char *)destination;
 while(sz--) {
      *pt++ = (unsigned char)(source);
 }
 return destination;
}
 
int main()
{
 char str1[]  = "What the heck is memset?";
 char str2[]  = "What the heck is memset?";
 memset(str1 + 5, '*',8);
 puts(str1);
 MyMemset(str2 + 5, 'x',8);
 puts(str2);
 return 0;
}
Output is:
What ******** is memset?
What xxxxxxxx is memset?



INT MEMCMP (CONST VOID *S1, CONST VOID *S2, SIZE_T SZ )
Compare two blocks of memory.
Compares the first sz bytes of the block of memory pointed by s1 to the first sz bytes pointed by s2, returning zero if they all match or a value different from zero representing which is greater if they do not. Unlike strcmp(), the function does not stop comparing after finding a null character.
#include <iostream>
using namespace std;

int memcmp(const void *s1, const void *s2, size_t sz)
{
 if (sz!= 0) {
  const char *p1 = (const char *)s1;
  const char *p2 = (const char *)s2;
  do {
   if (*p1++ != *p2++)
    return (*--p1 - *--p2);
  } while (--sz!= 0);
 }
 return (0);
}

int main ()
{
  char s1[] = "abc123";
  char s2[] = "abc12A";

  int n = memcmp ( s1, s2, sizeof(s1) );

  if (n > 0) 
   cout << s1 << " is greater than " << s2 << endl;
  else if (n < 0) 
   cout << s1 << " is less than " << s2 << endl;
  else 
   cout << s1 << " is the same as " << s2 << endl;

  return 0;
}
Output is:
abc123 is less than abc12A



CONVERTING STRING TO INTEGER (ATOI())
example A
#include <sstream>
int str2intA(const string &str) {
       stringstream ss(str);
       int n;
       ss >> n;
       return n;
}

example B
int str2intB(const string &str){
 int n = 0;
 int len =str.length();
 for(int i = 0; i < len; i++) {
  n *= 10;
  n += str[i]-'0';
 }
 return n;
}



CONVERTING INTEGER TO STRING (ITOA())
example A
#include <sstream>
string int2strA(int n) {
 stringstream ss;
 ss << n; 
 return ss.str();
}

example B
string int2strB(int numb) {
 int i=0;
 int end=10;
 char* temp = new char[end];
 
 //store one digit at a time 
 for(i = 0; numb > 0; i++) {
  temp[i] = (numb % 10) + '0';
  numb /= 10;
 }

 // temp has the number in reverse order
 int len = i;
 int ii = 0;
 string s = new char[len];
 while(i > 0) s[ii++] = temp[--i];
 s.erase(len);
 delete temp;
 return s;
}

example C
#include <iostream>
#include <cstring>

using namespace std;

// reverse the string
void reverse(char *s)
{
 int sz = strlen(s);
 char c;
 int i, j;
 for(i = 0, j = sz -1; i < j; i++, j--) {
  c = s[i];
  s[i] = s[j];
  s[j] = c;
 }
}

// itoa
void int2strC(int n, char *s)
{
 int i = 0;
 while( n ) {
  s[i++] =  n % 10 + '0';
  n /= 10;
 }
 s[i] = '\0';
 cout << "before the reverse s = " << s << endl;
 reverse(s);
}

int main()
{
 char s[10];
 int n = 987654321;
 cout << "input n = " << n << endl;
 int2strC(n, s);
 cout << "output s = " << s << endl;
 return 0;
}
Output from the run:
input n = 987654321
before the reverse s = 123456789
output s = 987654321



ASCII STRING TO FLOAT - ATOF()
#include <iostream>
using namespace std;

double atofA(char s[]) 
{
 double d = 0, power = 1;
 int sign = 1;
 int i = 0;

 if(s[i] == '-' ) sign = -1;
 i++;
 while(s[i]) {
  if(s[i] == '.') break;
  d = d * 10 + (s[i] - '0');
  i++;
 }
 i++;
 power = 0.1;
 while(s[i]) {
  d += (s[i] - '0')*power;
  power /= 10.0;
  i++;
 }

 return d*sign;
}

int main()
{
 double d = atofA("-314.1592");
 cout.precision(7);
 cout  << d << endl;
 return 0;
}



REVERSING AN INTEGER: 345->543
#include <iostream>

using namespace std;

int reverse_int(int n)
{
 int m = 0;
 while(n >= 1) {
  m *= 10;
  m += n % 10;
  n = n / 10;
 }
 return m;
}

int main()
{
 int n;
 cin >> n;
 cout << "reverse of " << n << " is " << reverse_int(n) << endl;
 return 0;
}
Output:
345
reverse of 345 is 543



MULTIPLICATION WITHOUT MULTIPLICATION - RUSSIAN PEASANT MULTIPLICATION
238*13 without any multiplication operation.
Actually, the algorithm we're going to use is known as Russian Peasant Multiplication or
wiki-Ancient Egyptian multiplication.
// Russian Peasant Multiplication 

#include <iostream>
#include <iomanip>
using namespace std;

int RussianPeasant(int a, int b)
{
 int x = a,  y = b;
 int val = 0;
 cout << left << setw(5) << x << left << setw(5) << y << left << setw(5) << val << endl;
 while (x > 0) {
  if (x % 2 == 1) val = val + y;
  y = y << 1;  // double
  x = x >> 1;  // half
  cout << left << setw(5) << x << left << setw(5) << y << left << setw(5) << val << endl; 
 }
 return val;
}

int main() {
 RussianPeasant(238, 13);
 return 0;
}
The output should look like this:
238  13   0
119  26   0
59   52   26
29   104  78
14   208  182
7    416  182
3    832  598
1    1664 1430
0    3328 3094
Actually, the multiplication is known as Russian Peasant Multiplication. Whenever x is odd, the value of y is added to val until xequals to zero, and then it returns 3094.



REVERSING A STRING
Example A
#include <iostream>
#include <cstring>

void reverse_string(char s[]) {
 int i,j,c;
 for(i = 0, j = strlen(s)-1; i < j ; i++, j-- ) {
  c = s[i];
  s[i] = s[j];
  s[j] = c;
 }
}

int main() {
 char s[] ="reverse me";
 reverse_string(s);
 std::cout << s << std::endl;
 return 0;
}

Example B Printing a String Reversed 
Here we have two print functions, one for normal and the other one for printing a string in reverse.
#include <iostream>

using namespace std;

void normalPrint(char *s)
{
 if(*s) {
  putchar(*s);
  normalPrint(s+1);
 }
}
void reversePrint(char *s)
{
 if(*s) {
  reversePrint(s+1);
  putchar(*s);
 }
}

int main() 
{
 char *str = "Normal or Reverse";
 normalPrint(str);
 cout << endl;
 reversePrint(str);
 cout << endl;
 return 0;
}
Output is:
Normal or Reverse
esreveR ro lamroN


FINDING THE FIRST UN-REPEATING (NON-REPEATING) CHARACTER - METHOD A
This code is using map. 
  1. Put all characters into a map.
  2. If necessary (repeated), it will increment a counter which is the 2nd component of map, value.
  3. Then, traverse the string and search the map. If found and counter value is 1, return that character.
  4. Complexity: traversing (O(N)), searching balanced binary tree (O(log(N)) => O(N + log(N)) => O(N)
// Returning a character which is the first non-repeated in a string
#include <iostream>
#include <string>
#include <map>

using namespace std;
char find(string s)
{
 map<char,int> myMap;
 int len = s.length();
 for(int i = 0; i < len; i++) ++myMap[s[i]];
 for(int i = 0; i < len; i++) 
  if(myMap.find(s[i])->second == 1) return s[i];

 return 0;
}

int main()
{
 string s="abudabi";
 cout << find(s);
 return 0;
}



FINDING THE FIRST UN-REPEATING (NON-REPEATING) CHARACTER - METHOD B
This method is using simple array utilizing the inversion of character and integer.
Complexity: O(N) - It needs just two traversing.
// Returning a character which is the first non-repeated in a string
#include <iostream>
using namespace std;

char find(char* s)
{
 int myArr[256] = {0};
 int len = strlen(s);
 for(int i = 0; i < len; i++) 
  myArr[s[i]]++;

 for(int i = 0; i < len; i++)
  if(myArr[s[i]] == 1) return s[i];

 return 0;
}

int main()
{
 char* s="abudabi";
 cout << find(s);  // output 'u'
 return 0;
}



BINARY SEARCH A
#include <iostream>

using namespace std;

int bsearch(int [], int, int, int);

int main()
{
 const int arrSize = 100;
 int arr[arrSize];

 for (int i = 0; i <= arrSize -1; i++) {
  arr[i] = i;
 }

 int x = 35;
 int low = 0;
 int high = arrSize -1;
 int found = bsearch(arr, x, low, high);
 if(found == -1) {
  cout << "Could not find " << x << endl;
  return 0;
 }
 cout << "The value " << x << " is at " << found << endl;
 return 0;
}

int bsearch(int arr[], int x, int low, int high) {
 int mid;
 if(high < low) return -1;  // no match 
 while (low <= high) {
  mid = low + (high - low) / 2;
  if(x < arr[mid])
   high = mid - 1;
  else if (x > arr[mid])
   low = mid + 1;
  else
   return mid; // this is match 
 }
}


BINARY SEARCH B - RECURSIVE
int bsearch(int arr[], int x, int low, int high) {
 int mid;
 if(high < low) return -1;  // no match 
 mid = low + (high - low) / 2;
 if(x < arr[mid]) 
  bsearch(arr, x, low, mid - 1);
 else if (x > arr[mid]) 
  bsearch(arr, x, mid + 1, high);
 else
  return mid; // this is match 
}



BIG ENDIAN OR LITTLE ENDIAN (BYTE ORDER)?
If we want to represent a two-byte hex number, say c23g, we'll store it in two sequential bytes c2 followed by 3g. It seems that it's the right way of doing it. This number, stored with the big end first, is called big-endian. Unfortunately, however, a few computers, namely anything with an Intel or Intel-compatible processor, store the bytes reversed, so c23g would be stored in memory as the sequential bytes 3g followed by c2. This storage method is called little-endian.
Endian is important to know when reading or writing data structures, especially across networks so that different application can communicate with each other. Sometimes the endianness is hidden from the developer. Java uses a fixed endianness to store data regardless of the underlying platform's endianness, so data exchanges between two Java applications won't normally be affected by endianness. But other languages, C in particular don't specify an endianness for data storage, leaving the implementation free to choose the endianness that works best for the platform.
On big-endian (PowerPC, SPARC, and Internet), the 32-bit value x01234567 is stored as four bytes 0x01, 0x23, 0x45, 0x67, while on little-endian (Intel x86), it will be stored in reverse order:
Little_Big_Endians.png 




endian_diagram 

The picture below is a diagram for the example to check if a machine is little endian or big endian. The box shaded blue is the the byte we are checking because it's where a one-byte type (char) will be stored. The diagram shows where an integer (4-byte) value 1is stored.
We can distinguish between the LSB and the MSB because the value 1 as an integer, has the value of 1 for LSN, and the value of 0 for MSB.
Note that a little endian machine will place the 1 (0001x, LSB) into the lowest memory address location. However, a big endian machine will put 0 (0001x, MSB) into the lowest memory address location.

checking_little_big_endian 

Source A
#include <iostream>
using namespace std;

/* if the dereferenced pointer is 1, the machine is little-endian 
   otherwise the machine is big-endian */

int endian() {
 int one = 1;
 char *ptr;
 ptr = (char *)&one;
 return (*ptr);
}

int main()
{
 if(endian()) 
  cout << "little endian\n";
 else
  cout << "big endian\n";
}

Source B
#include <iostream>
using namespace std;

int endian() {
 union {
  int one;
  char ch;
 } endn;

 endn.one = 1;
 return endn.ch;
}

int main()
{
 if(endian()) 
  cout << "little endian\n";
 else
  cout << "big endian\n";
}
It is tempting to use bit operation for this problem. However, bit shift operator works on integer, not knowing the internal byte order, and that property prevents us from using the bit operators to determine byte order.


To check out the endian, we can just print out an integer (4 byte) to a 4-character output with the address of each character.
#include <stdio.h>

int main()
{
 int a = 12345; // x00003039
 char *ptr = (char*)(&a);
 for(int i = 0; i < sizeof(a); i++) {
  printf("%p\t0x%.2x\n", ptr+i, *(ptr+i));
 }
 return 0;
}
Output:
0088F8D8        0x39
0088F8D9        0x30 
0088F8DA        0x00
0088F8DB        0x00
As we see from the output, LSB (0x39) was written first at the lower address, which indicate my machine is little endian.


REMOVING COMMAS
Given a char array {1,234,34,54} Modify the char array so that there is no comma in the most efficient way. We must get a char array {12343454}
#include <iostream>
using namespace std;

void remove_comma(char arr[])
{
 char *b = arr, *c = arr;
 while(*b)
 {
  if(*b == ',') b++;
  else
   *c++ = *b++;
 }
 *c = '\0';
 return;

}

int main()
{
 char arr[] = "1,234,34,54";
 cout << arr << endl;

 remove_comma(arr);

 cout << arr << endl;

    return 0;
}
Output:
1,234,34,54
12343454


HASH TABLE BASED ON K & R
#include <iostream>
using namespace std;

#include 
using namespace std;

#define HASHSIZE 101

struct nlist {       /* table entry: */
       struct nlist *next;   /* next entry in chain */
       char *name;           /* defined name */
       char *defn;           /* replacement text */
};

static struct nlist *hashtab[HASHSIZE];

unsigned hash_function(char *s) {
       unsigned hashval;

       for (hashval = 0; *s != '\0'; s++)
           hashval = *s + 31 * hashval;
       return hashval % HASHSIZE;
}

struct nlist *lookup(char *s) {
       struct nlist *np;

       for (np = hashtab[hash_function(s)];  np != NULL; np = np->next)
           if (strcmp(s, np->name) == 0)
               return np;     /* found */
       return NULL;           /* not found */
}

/* install:  put (name, defn) in hashtab */
struct nlist *install(char *name, char *defn) {
       struct nlist *np;
       unsigned hashval;

       if ((np = lookup(name)) == NULL) { /* not found */
     np = new nlist;
           if (np == NULL || (np->name = _strdup(name)) == NULL)
               return NULL;
           hashval = hash_function(name);
           np->next = hashtab[hashval];
           hashtab[hashval] = np;
       } else       /* already there */
           /*free previous defn */
     delete np->defn;
       if ((np->defn = _strdup(defn)) == NULL)
           return NULL;
       return np;
}

/* uninstall:  take (name, defn) out of hashtab */
int undef(char * name) {
 struct nlist * np1;
 /* name not found */
 if ((np1 = lookup(name)) == NULL)  
  return 1;

 /* name found */
 if((np1 = hashtab[hash_function(name)]) != NULL ) {
  hashtab[hash_function(name)] = np1->next;
  delete np1;
  return 0;
 }
 /* name not found */
 return 1;  
}

void print() {

 struct nlist *np;
 for(int i = 0; i < HASHSIZE; i++) {
  np = hashtab[i];
  if(np) 
   cout << np->name << "-->" << np->defn << " ";
 }
 cout << endl;
}

int main()
{
 install("A","65");
 install("B","66");
 install("C","67");
 install("D","68");
 install("E","69");
 print();
 undef("C");
 print();
 undef("B");
 print();
 return 0;
}
Output from the run:
A-->65 B-->66 C-->67 D-->68 E-->69
A-->65 B-->66 D-->68 E-->69
A-->65 D-->68 E-->69


CALCULATOR BASED ON K & R
/* Calculator based on K & R
  (1) Not working for '(' such as 1+(5-3) 
  (2) Added 
  getop(s);
  push(a2f(s));
 after '+', '-', '*', and '/' to push next number
*/

#include <iostream>
#include <cstring>
#include <cctype>

using namespace std;

int getop(char []);
void push(double);
double pop(void);
int getch(void);
void ungetch(int);
int a2f(char *);
void calc();

int main() 
{
 calc();
 return 0;
}

const int MAXOP = 100;
const char NUMBER = '0';

void calc() {
 int type;
 double op2;
 char s[MAXOP];
  
 while((type = getop(s)) != EOF) {
  switch (type) {
   case NUMBER:
    push(a2f(s));
    break;
   case '+':
    getop(s);
    push(a2f(s));
    push(pop() + pop());
    break;
   case '*':
    getop(s);
    push(a2f(s));
    push(pop() * pop());
    break;
   case '-':
    getop(s);
    push(a2f(s));
    op2 = pop();
    push(pop()-op2);
    break;
   case '/':
    getop(s);
    push(a2f(s));
    op2 = pop();
    if(op2 != 0.0) 
     push(pop() / op2);
    else
     cout << "error: division by zero" << endl;
    break;
   case '\n':
    cout << pop() << endl;
    break;
   default:
    cout << "error: unknown command " << s << endl;
  }
 }
}

// getop(): get next operator or numeric operand 
int getop(char s[]) {
 int i, c;

 while ( (s[0] = c = getch()) == ' ' || c == '\t') 
  ;
 s[1] = '\0';
 if( !isdigit(c) && c != '.')
  return c; // not a number
 i = 0;
 if (isdigit(c)) // collect integer part
  while (isdigit(s[++i] = c = getch()))
   ;
 if (c == '.') // collect fraction
  while (isdigit(s[++i] = c = getch()))
   ;
 s[i] = '\0';
 if (c != EOF )
  ungetch(c);

 return NUMBER;
}

const int BUFSIZE = 100;
char buf[BUFSIZE];
int bufp = 0;

int getch(void) {
 return (bufp > 0) ? buf[--bufp] : cin.get();
}

void ungetch(int c) {
 if (bufp >= BUFSIZE)
  cout << "ungetch(): too many characters\n";
 else
  buf[bufp++] = c;
}

int a2f(char *s) {
 int n = 0;
 while(*s) {
  n *= 10;
  n += (*s++)-'0';
 }
 return n;
}

const int MAXVAL = 100;
int sp = 0;
double val[MAXVAL];

void push(double f) {
 if( sp < MAXVAL )
  val[sp++] = f;
 else
  cout << "error: stack full, can't push "<< f << endl;
}

double pop(void) {
 if (sp > 0)
  return val[--sp];
 else {
  cout << "error: stack empty \n";
  return 0.0;
 }
}


SIZE OF STRUCT
How do we find the number of structures for the given array of structures in the sample code below?
#include <iostream>

using namespace std; 

struct vehicle
{
 int price;
 char type[10];
} car[] = {
 {1,"a"},{2,"b"},{3,"c"}
};

int main() 
{
 cout << "sizeof(car)=" << sizeof(car) << endl;
 cout << "sizeof(*car)=" << sizeof(*car) << endl;
 cout << "sizeof(car)/sizeof(*car)=" << sizeof(car)/sizeof(*car) << endl;
 return 0;
}
Answer is:
sizeof(car)=48
sizeof(*car)=16
sizeof(car)/sizeof(*car)=3
Note that the size of an array of vehicle struct is not 4+1*10 but 16. This is probably due to padding/allignmment:
4 + 1*4 + 1*4 + (1*2+2) = 16
As an exercise, how about the following example:
struct StructA 
{ 
 char ch1; 
 char ch2; 
 int n; 
}; 

struct StructB 
{ 
 char ch1; 
 int n; 
 char ch2; 
}; 

int sizeA = sizeof(StructA); // sizeA = 1 + 1 + (2-padding) + 4 = 8
int sizeB = sizeof(StructB); // sizeB = 1 + (3-padding) + 4 + 1 + (3-padding) = 12
Remember, it is frequently very difficult to predict the sizes of compound datatypes such as a struct or union due to structurepadding. Another reason for using sizeof is readability.


SIZE OF STRUCT & CLASS
#include <iostream>

class con
{
public: 
 struct node
 {
  int data;
  int rest;
 };

 con(){}
};

int main()
{
 con c;
 std::cout << "size of con class =" << sizeof(c);  // size is 1
 return 0;
}
The class is empty, in other words, no members. But the size of the class is 1. Not sure but it seems to be related to the pointer arithmetic(?). However, if we add the following to the structure, things change:
 struct node
 {
  int data;
  int rest;
 } nd;
The the output becomes 4(data)+4(rest)=8. If we use pointer then:
 struct node
 {
  int data;
  int rest;
 } *nd;
the size becomes 4(nd).


REMOVING LEFT SPACE OF A STRING
#include <iostream>
#include <cassert>

using namespace std; 

char *left_space_trim(char *buf, const char *s) {
 assert(buf != NULL);
 assert( s!= NULL);

 while(isspace(*s))
  s++;
 strcpy(buf,s);
 return buf;
}

int main() 
{
 char *srcStr = strdup("  String with left_side spaces");
 char *dstStr = (char*)malloc(strlen(srcStr)+1);
 cout << srcStr << "=>" << left_space_trim(dstStr,srcStr) << endl;

 return 0;
}
Output is:
  String with left_side spaces=>String with left_side spaces



RECTANGLES OVERLAPPING
It is also called OBB/OBB (Oriented Bounding Box) Intersection problem. Here, a brute force method will be presented first, and the other one (separating axes approach) which seems more elegant will come later time.

rectangle 
#include <iostream>

class Point
{
public:
 Point(float a, float b):x(a),y(b){}
 float getX() {return x;}
 float getY() {return y;}
private:
 float x, y;
};

class Rect
{
public:
 Rect(Point a, Point b):ul(a),lr(b){}
 float getHeight() {return ul.getY()-lr.getY();}
 float getWidth() {return lr.getX()-ul.getX();}
 float getXmin() {return ul.getX();}
 float getXmax() {return lr.getX();}
 float getYmin() {return lr.getY();}
 float getYmax() {return ul.getY();}
private:
 Point ul,lr;
};

bool overlapped(Rect &r1, Rect &r2)
{
 float height1 = r1.getHeight();
 float height2 = r2.getHeight();
 float width1 = r1.getWidth();
 float width2 = r2.getWidth();
 // case A: No corners inside 
 if(r1.getWidth() >= r2.getWidth()) {
  if(r2.getXmin() >= r1.getXmin() && r2.getXmax() <= r1.getXmax()
   && r2.getYmin() <= r1.getYmin() && r2.getYmax() >= r1.getYmax() ) 
   return true;
 }
 else {
  if(r1.getXmin() >= r2.getXmin() && r1.getXmax() <= r2.getXmax()
   && r1.getYmin() <= r2.getYmin() && r1.getYmax() >= r2.getYmax() ) 
   return true;
 }

 // case B: r2's corner inside r1 rect? (compare r1 with r2)
 // r2's ul (Xmin, Ymax)
 if(r1.getXmin() <= r2.getXmin() && r1.getXmax() >= r2.getXmin()
   && r1.getYmax() >= r2.getYmax() && r1.getYmin() <= r2.getYmax() ) 
   return true;
 // r2's ur (Xmax, Ymax)
 if(r1.getXmin() <= r2.getXmax() && r1.getXmax() >= r2.getXmax()
   && r1.getYmax() >= r2.getYmax() && r1.getYmin() <= r2.getYmax() ) 
   return true;
 // r2's ll (Xmin, Ymin)
 if(r1.getXmin() <= r2.getXmin() && r1.getXmax() >= r2.getXmin()
   && r1.getYmax() >= r2.getYmin() && r1.getYmin() <= r2.getYmin() ) 
   return true;
 // r2's lr (Xmax, Ymin)
 if(r1.getXmin() <= r2.getXmax() && r1.getXmax() >= r2.getXmax()
   && r1.getYmax() >= r2.getYmin() && r1.getYmin() <= r2.getYmin() ) 
   return true;
 return false;
}

int main()
{
 float x1 = 0.0, y1 = 5.0, x2 = 5.0, y2 = 0.0;
 float x3 = 4.0, y3 = 1.0, x4 = 6.0, y4 = -1.0;

 Point pt1(x1,y1), pt2(x2,y2);
 Point pt3(x3,y3), pt4(x4,y4);

 Rect rta(pt1,pt2);
 Rect rtb(pt3,pt4);

 if(overlapped(rta,rtb)) 
  std::cout << "Overlapped: " << std::endl;
 else
  std::cout << "Not overlapped: " << std::endl;

 return 0;
}



FINDING PAIRS OF INTEGERS WHOSE SUM EQUALS TO A GIVEN SUM
The input an ordered array.
We start to check from both ends, and depending on the sum of the pair, we either increment left index or decrement the right index until they meet.
#include <iostream>
using namespace std;

// input sum
// input array, a
// input size of array, sz
// input & output pairs of integers, pair

int fnd(int sum, int *a, int sz, int *pair)
{
    int i = 0, j = sz-1, index = 0, count = 0;

    while(i < j) {
 if(a[i]+a[j] > sum) 
            j--;
 else if(a[i]+a[j] < sum) 
     i++;
 else {
     pair[index++] = a[i++];
     pair[index++] = a[j--];
     count += 2;
        }
    }
    return count;
}

int main()
{
    int sum =18;
    int a[] = {1, 3, 5, 7, 9, 11, 13, 15, 16};
    int size = sizeof(a)/sizeof(int);
    int *pair = new int[size];

    int count = fnd(sum, a, size, pair);

    for(int i = 0; i < count; i++) cout << pair[i] << " ";

    return 0;
}