diff --git a/src/ich/algo.py b/src/ich/algo.py
index ed8874965d911d95e20dc8ed3a8d44d60ecadae1..bb2abdadc689cd42e58ab8d3971d0f01d974b4bd 100644
--- a/src/ich/algo.py
+++ b/src/ich/algo.py
@@ -73,14 +73,16 @@ def process_st_window(win, temp_cor, temp_lidar, args):
     # if np.all(np.array(parm) == 0):
     #     parm = parm_.copy()
     
-    if np.all(mask == 0) or np.all(np.isnan(mask)):
-        np.fill_diagonal(mask, 1)
-        mask = np.flipud(mask)
-        np.fill_diagonal(mask, 1)
-
-    mask = np.nan_to_num(mask)
-    s = np.full((args.window_size, args.window_size), parm[1]) * mask
-    c = np.full((args.window_size, args.window_size), parm[2]) * mask
+    # if np.all(mask == 0) or np.all(np.isnan(mask)):
+    #     np.fill_diagonal(mask, 1)
+    #     mask = np.flipud(mask)
+    #     np.fill_diagonal(mask, 1)
+
+    # mask = np.nan_to_num(mask)
+    # s = np.full((args.window_size, args.window_size), parm[1]) * mask
+    # c = np.full((args.window_size, args.window_size), parm[2]) * mask
+    s = np.full((args.window_size, args.window_size), parm[1]) 
+    c = np.full((args.window_size, args.window_size), parm[2]) 
     rmse = np.full((args.window_size, args.window_size), parm[4]) 
     count = np.full((args.window_size, args.window_size), 
                     np.count_nonzero(~np.isnan(temp_lidar)))
@@ -186,6 +188,9 @@ def cal_(temp_cor, temp_gedi, htl, htg):
 
                 
                 height[(height >= htg) | (height <= htl)] = np.nan
+
+                inv_N = np.count_nonzero(~np.isnan(height))    
+
                 x, y = temp_gedi.flatten(), height.flatten()
                 valid = ~np.isnan(x) & ~np.isnan(y)
                 x, y = x[valid], y[valid]
@@ -206,14 +211,13 @@ def cal_(temp_cor, temp_gedi, htl, htg):
                                     'r': result[:, 3], 'rmse': result[:, 4]})
         del result
         tempdf.dropna(subset=['rmse'], inplace=True)
-        if nn>6:
-            tempdf = tempdf[tempdf['N'] > 3].sort_values(by=['rmse'], ascending=True)
-
         if tempdf.empty:
-            sCoarse = cCoarse = 0
-        else:
-            sCoarse = np.round(tempdf.iloc[0]['S'], 2)
-            cCoarse = np.round(tempdf.iloc[0]['C'], 2)
+            return [0, 0, 0, 0, 0]        
+        elif nn>6:
+            tempdf_coarse = tempdf[tempdf['N'] > 3].sort_values(by=['rmse'], ascending=True)
+            sCoarse = np.round(tempdf_coarse.iloc[0]['S'], 2)
+            cCoarse = np.round(tempdf_coarse.iloc[0]['C'], 2)
+
             del tempdf
             result = []
             for S_param in np.arange(sCoarse - 0.1, sCoarse + 0.1, 0.02):
@@ -242,11 +246,17 @@ def cal_(temp_cor, temp_gedi, htl, htg):
                                         'r': result[:, 3], 'rmse': result[:, 4]})
             del result
             tempdf.dropna(subset=['rmse'], inplace=True)
-            if nn>6:
-                tempdf = tempdf[tempdf['N'] > 3].sort_values(by=['rmse'], ascending=True)
+            # if nn>6:
+            tempdf = tempdf[tempdf['N'] > 3].sort_values(by=['rmse'], ascending=True)
 
-        if tempdf.empty:
-            return [0, 0, 0, 0, 0]
+            if tempdf.empty and sCoarse==0:
+                return [0, 0, 0, 0, 0]
+            
+            elif tempdf.empty and sCoarse!=0:
+                list(tempdf_coarse.iloc[0])
+            
+            else:
+                return list(tempdf.iloc[0])
         else:
             return list(tempdf.iloc[0])
 
diff --git a/src/ich/args_in.py b/src/ich/args_in.py
index a77b52fc70e580dae7dd407c1c1ebca2a1a9be00..0b1d2716722e87938bc93ba0c444eb3af9e2195d 100644
--- a/src/ich/args_in.py
+++ b/src/ich/args_in.py
@@ -185,34 +185,26 @@ def rvog_inverse(args):
             rmse__=[]
             count = []
             parm_ = [0,0,0,0,0]
-            for win in tqdm(range(np.shape(temp_cor)[0])):
-            # for win in (range(np.shape(temp_cor)[0])):
-                parm = cal_(temp_cor[win],temp_lidar[win],args.htl,args.htg)
-
-                mask = temp_lidar[win].copy()
-                mask[~np.isnan(mask)] = 1 
-                
-                if np.all(np.array(parm) == 0):
-                    parm=parm_.copy()
-                if np.all(mask==0) or np.all(np.isnan(mask)):
-                    # mask[np.shape(mask)[0]//2,np.shape(mask)[1]//2]=1
-                    np.fill_diagonal(mask, 1)
-                    mask = np.flipud(mask)
-                    np.fill_diagonal(mask, 1)
-
-
-                s.append(np.full((args.window_size,args.window_size), parm[1])*mask)
-                c.append(np.full((args.window_size,args.window_size), parm[2])*mask)
-                # s.append(np.full((args.window_size,args.window_size), parm[1]))
-                # c.append(np.full((args.window_size,args.window_size), parm[2]))
+            
 
-                rmse__.append(np.full((args.window_size,args.window_size), parm[4])) 
+            batch_size = 100  # Define your batch size
+            num_windows = np.shape(temp_cor)[0]
+            results = [None] * num_windows  # Preallocate a list to hold results
 
-                count.append(np.count_nonzero(~np.isnan(temp_lidar[win,:,:])))
-                gama = temp_cor[win] / parm[1]
-                ht_.append(arc_sinc(gama, parm[2])*mask)
+            with concurrent.futures.ProcessPoolExecutor() as executor:
+                futures = {}
+                for start in range(0, num_windows, batch_size):
+                    end = min(start + batch_size, num_windows)
+                    for win in range(start, end):
+                        futures[executor.submit(process_st_window, win, temp_cor[win], temp_lidar[win], args)] = win
+                        
+                for future in tqdm(concurrent.futures.as_completed(futures),total=len(futures)):
+                    win_index = futures[future]  # Get the corresponding index
+                    result = future.result()
+                    results[win_index] = result  # Store result directly at the corresponding index
 
-                parm_ = parm.copy()
+            # Unpack results
+            s, c, rmse__, count, ht_ = zip(*results)
 
 
             s = emp.merge_patches(s, indices__,mode='max')
@@ -220,7 +212,7 @@ def rvog_inverse(args):
             rmse__ = emp.merge_patches(rmse__, indices__,mode='max')
             ht_ = emp.merge_patches(ht_, indices__,mode='max')
             temp_cor = emp.merge_patches(temp_cor, indices__,mode='max')
-            count = emp.merge_patches(count, indices__,mode='mean')
+            count = emp.merge_patches(count, indices__,mode='avg')
         
 
         elif args.algo==3:
@@ -256,10 +248,153 @@ def rvog_inverse(args):
             c = c*temp_mask
 
             temp_cor = cor.copy()
+        elif args.algo==4:   
+            temp_cor = blockshaped(cor, args.window_size, args.window_size)
+            temp_lidar = blockshaped(lidar_ht_cal, args.window_size, args.window_size)
+
+            batch_size = 100  # Define your batch size
+            num_windows = np.shape(temp_cor)[0]
+            results = [None] * num_windows  # Preallocate a list to hold results
+
+            with concurrent.futures.ProcessPoolExecutor() as executor:
+                futures = {}
+                for start in range(0, num_windows, batch_size):
+                    end = min(start + batch_size, num_windows)
+                    for win in range(start, end):
+                        futures[executor.submit(process_st_window, win, temp_cor[win,:,:], temp_lidar[win,:,:], args)] = win
+                        
+                for future in tqdm(concurrent.futures.as_completed(futures),total=len(futures)):
+                    win_index = futures[future]  # Get the corresponding index
+                    result = future.result()
+                    results[win_index] = result  # Store result directly at the corresponding index
+
+            # Unpack results
+            sw, cw, rmse__, count, ht_ = zip(*results)
+            del results
+            print('20')
+            args.window_size = 20
+            cor20 = blockshaped(cor, args.window_size, args.window_size)
+            lidar20 = blockshaped(lidar_ht_cal, args.window_size,args.window_size)
+            print('20 bolcked')
+            batch_size = 100  # Define your batch size
+            num_windows = np.shape(cor20)[0]
+            results = [None] * num_windows  # Preallocate a list to hold results
+
+            with concurrent.futures.ProcessPoolExecutor() as executor:
+                futures = {}
+                for start in range(0, num_windows, batch_size):
+                    end = min(start + batch_size, num_windows)
+                    for win in range(start, end):
+                        futures[executor.submit(process_st_window, win, cor20[win,:,:], lidar20[win,:,:], args)] = win
+                        
+                for future in tqdm(concurrent.futures.as_completed(futures),total=len(futures)):
+                    win_index = futures[future]  # Get the corresponding index
+                    result = future.result()
+                    results[win_index] = result  # Store result directly at the corresponding index
+
+            # Unpack results
+            s20, c20, _, _, _ = zip(*results)
+            del results
+            print('50')
+            args.window_size = 50
+            cor50 = blockshaped(cor, 50, 50)
+            lidar50 = blockshaped(lidar_ht_cal, 50,50)
+
+            batch_size = 100  # Define your batch size
+            num_windows = np.shape(cor50)[0]
+            results = [None] * num_windows  # Preallocate a list to hold results
 
+            with concurrent.futures.ProcessPoolExecutor() as executor:
+                futures = {}
+                for start in range(0, num_windows, batch_size):
+                    end = min(start + batch_size, num_windows)
+                    for win in range(start, end):
+                        futures[executor.submit(process_st_window, win, cor50[win,:,:], lidar50[win,:,:], args)] = win
+                        
+                for future in tqdm(concurrent.futures.as_completed(futures),total=len(futures)):
+                    win_index = futures[future]  # Get the corresponding index
+                    result = future.result()
+                    results[win_index] = result  # Store result directly at the corresponding index
+
+            # Unpack results
+            s50, c50, _, _, _ = zip(*results)
+
+
+
+
+            s20 = unblockshaped(np.array(s20), rows,cols)
+            c20 = unblockshaped(np.array(c20), rows,cols)
+            s50 = unblockshaped(np.array(s50), rows,cols)
+            c50 = unblockshaped(np.array(c50), rows,cols)
+
+
+            sw = unblockshaped(np.array(sw), rows,cols)
+            cw = unblockshaped(np.array(cw), rows,cols)
+            rmse__ = unblockshaped(np.array(rmse__), rows,cols)
+            ht_ = unblockshaped(np.array(ht_), rows,cols)
+            count = unblockshaped(np.array(count), rows,cols)            
+            temp_cor = unblockshaped(temp_cor, rows,cols)
+
+            # # s = np.nanmean(np.dstack([sw,s20,s50])
+
+            # s = np.nanmean(np.stack((sw,s20,s50), axis=0), axis=0)
+            # c = np.nanmean(np.stack((cw,c20,c50), axis=0), axis=0)
+
+
+            temp_lidar = blockshaped(lidar_ht_cal, 10, 10)
+            sw = blockshaped(sw, 10, 10)
+            cw = blockshaped(cw, 10, 10)
+
+            s20 = blockshaped(s20, 10, 10)
+            c20 = blockshaped(c20, 10, 10)
+            
+            s50 = blockshaped(s50, 10, 10)
+            c50 = blockshaped(c50, 10, 10)
+
+            s = np.zeros(temp_lidar.shape)
+            c = np.zeros(temp_lidar.shape)
+            temp_mask = np.zeros(temp_lidar.shape)
+
+
+
+            for win in tqdm(range(np.shape(temp_lidar)[0])):
+                mask = temp_lidar[win,:,:].copy()
+                mask[~np.isnan(mask)] = 1 
+                temp_mask[win,:,:] = mask
+                s[win,:,:] = sw[win,:,:]
+                c[win,:,:] = cw[win,:,:]
+
+
+
+                if np.all(temp_lidar[win,:,:]==0) or np.all(np.isnan(temp_lidar[win,:,:])):
+                    mask = np.zeros(temp_mask[win,:,:].shape)
+                    # mask[np.shape(mask)[0]//2,np.shape(mask)[1]//2]=1
+                    np.fill_diagonal(mask, 1)
+                    mask = np.flipud(mask)
+                    np.fill_diagonal(mask, 1)
+                    temp_mask[win,:,:] = mask
+                    s[win,:,:] = np.nanmean(np.stack((sw[win,:,:],s20[win,:,:],s50[win,:,:]), axis=0), axis=0)
+                    c[win,:,:] = np.nanmean(np.stack((cw[win,:,:],c20[win,:,:],c50[win,:,:]), axis=0), axis=0)
+            
+            # del temp_lidar,sw,s20,s50,cw,c20,c50
+
+            temp_mask = unblockshaped(temp_mask, rows,cols)
+            s = unblockshaped(np.array(s), rows,cols)
+            c = unblockshaped(np.array(c), rows,cols)
+
+
+            s = s*temp_mask
+            c = c*temp_mask
+
+
+
+
+            # c = np.nanmean([sw,c20,c50])
         else:
             raise ValueError('Invalid algorithm type!')
 
+
+
         ht_[ht_==0] = np.nan
         s[s==0]=np.nan
         c[c==0]=np.nan